In [1]:
# 📦 General-purpose and OS handling
import os
import random
import numpy as np
import pandas as pd
from collections import Counter
# 📈 Visualization & plotting
import matplotlib.pyplot as plt
import seaborn as sns
# 🖼️ Image processing and feature extraction
from PIL import Image
import cv2
from skimage.feature import local_binary_pattern, hog
# 🔥 PyTorch for deep learning
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
# 🧠 Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# ⚙️ Scikit-learn for preprocessing and evaluation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
# 📈 Dimensionality Reduction
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
# 🔌 Optional: UMAP
try:
import umap
except ImportError:
print("⚠️ UMAP not found. Run `pip install umap-learn` to enable.")
# 🔍 Explainability (Week 6)
try:
from torchcam.methods import GradCAM
from torchcam.utils import overlay_mask
except ImportError:
print("⚠️ torchcam not found. Run `pip install torchcam` to enable Grad-CAM support.")
# 🌐 Dashboards (Week 7)
# try:
# import streamlit as st
# except ImportError:
# print("⚠️ Streamlit not installed. Run `pip install streamlit` if needed.")
# 🧪 Hyperparameter tuning (Optional)
# try:
# import optuna
# except ImportError:
# print("⚠️ Optuna not installed. Run `pip install optuna` if needed.")
Using device: cuda
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm 2025-04-25 12:15:10.543079: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered 2025-04-25 12:15:10.543373: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered 2025-04-25 12:15:10.665028: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered 2025-04-25 12:15:10.911921: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2025-04-25 12:15:12.336780: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
In [2]:
# Clean gpu
def clean_gpu():
torch.cuda.empty_cache()
gc.collect()
print("🧹 GPU/CPU memory cleaned.")
In [3]:
# Data exploration and Visualization
In [4]:
# 📁 Load label metadata and filter for images_001
LABELS_CSV_PATH = "Data_Entry_2017.csv"
IMAGES_DIR = "images_001"
# Load the label CSV
df = pd.read_csv(LABELS_CSV_PATH)
# List image filenames in the directory
available_images = set(os.listdir(IMAGES_DIR))
# Filter rows where the image actually exists in images_001
df_images = df[df["Image Index"].isin(available_images)].copy()
print(f"Total images in '{IMAGES_DIR}': {len(df_images)}")
display(df_images.head())
Total images in 'images_001': 4999
| Image Index | Finding Labels | Follow-up # | Patient ID | Patient Age | Patient Gender | View Position | OriginalImage[Width | Height] | OriginalImagePixelSpacing[x | y] | Unnamed: 11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 00000001_000.png | Cardiomegaly | 0 | 1 | 58 | M | PA | 2682 | 2749 | 0.143 | 0.143 | NaN |
| 1 | 00000001_001.png | Cardiomegaly|Emphysema | 1 | 1 | 58 | M | PA | 2894 | 2729 | 0.143 | 0.143 | NaN |
| 2 | 00000001_002.png | Cardiomegaly|Effusion | 2 | 1 | 58 | M | PA | 2500 | 2048 | 0.168 | 0.168 | NaN |
| 3 | 00000002_000.png | No Finding | 0 | 2 | 81 | M | PA | 2500 | 2048 | 0.171 | 0.171 | NaN |
| 4 | 00000003_000.png | Hernia | 0 | 3 | 81 | F | PA | 2582 | 2991 | 0.143 | 0.143 | NaN |
In [5]:
# 🧾 Split 'Finding Labels' into lists
df_images['Labels'] = df_images['Finding Labels'].fillna('').str.split('|')
# 📊 Count frequency of each individual label
from collections import Counter
all_labels = [label for labels in df_images['Labels'] for label in labels if label]
label_counts = Counter(all_labels)
# ✅ Convert to sorted DataFrame for plotting
label_df = pd.DataFrame.from_dict(label_counts, orient='index', columns=['Count']).sort_values(by='Count', ascending=False)
# 📈 Plot
plt.figure(figsize=(12, 6))
sns.barplot(x=label_df.index, y=label_df['Count'], palette='viridis')
plt.title(' Frequency of Each Pathology in images_001')
plt.xticks(rotation=45, ha='right')
plt.ylabel("Image Count")
plt.xlabel("Pathology Label")
plt.tight_layout()
plt.show()
/tmp/ipykernel_17261/3784897274.py:14: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. sns.barplot(x=label_df.index, y=label_df['Count'], palette='viridis')
In [6]:
# 🔝 Top 5 most common pathologies
most_common = label_df.head(5)
# 🔙 Bottom 5 least common pathologies
least_common = label_df.tail(5)
# 🖨️ Print results
print("Most Common Pathologies:")
display(most_common)
print("\nLeast Common Pathologies:")
display(least_common)
Most Common Pathologies:
| Count | |
|---|---|
| No Finding | 2754 |
| Infiltration | 830 |
| Effusion | 487 |
| Atelectasis | 460 |
| Nodule | 214 |
Least Common Pathologies:
| Count | |
|---|---|
| Mass | 161 |
| Emphysema | 125 |
| Edema | 90 |
| Pneumonia | 65 |
| Hernia | 27 |
In [7]:
# 🔀 Randomly sample 6 images from df_images
sample_images = random.sample(list(df_images['Image Index']), 6)
# 🖼️ Plot images with labels
plt.figure(figsize=(15, 10))
for i, img_name in enumerate(sample_images):
img_path = os.path.join(IMAGES_DIR, img_name)
try:
img = Image.open(img_path).convert("L") # Grayscale
labels = df_images[df_images["Image Index"] == img_name]["Finding Labels"].values[0]
plt.subplot(2, 3, i + 1)
plt.imshow(img, cmap="gray")
plt.title(f"🩺 {img_name}\\nLabels: {labels}", fontsize=10)
plt.axis("off")
except Exception as e:
print(f"⚠️ Could not load {img_name}: {e}")
plt.tight_layout()
plt.suptitle("Random Chest X-rays with Pathology Labels", fontsize=16)
plt.subplots_adjust(top=0.9)
plt.show()
/tmp/ipykernel_17261/362672737.py:21: UserWarning: Glyph 129658 (\N{STETHOSCOPE}) missing from font(s) DejaVu Sans.
plt.tight_layout()
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 129658 (\N{STETHOSCOPE}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
In [8]:
# 🧼 Clean 'Patient Age' column (handles any weird values like '411Y')
df_images['Age'] = pd.to_numeric(df_images['Patient Age'], errors='coerce')
# 🧹 Drop NaNs (non-convertible age entries)
df_images = df_images.dropna(subset=['Age'])
# 🔢 Convert to integer (safely)
df_images['Age'] = df_images['Age'].astype(int)
# 📈 Plot age distribution
plt.figure(figsize=(10, 5))
sns.histplot(df_images['Age'], bins=40, kde=True, color="steelblue")
plt.xlabel('Patient Age')
plt.ylabel('Count')
plt.title(' Age Distribution of Patients in images_001')
plt.grid(True)
plt.tight_layout()
plt.show()
In [9]:
# 🧼 Clean gender values (drop missing if any)
df_images = df_images.dropna(subset=["Patient Gender"])
gender_counts = df_images["Patient Gender"].value_counts()
# 📈 Plot pie chart
plt.figure(figsize=(6, 6))
colors = ["#66c2a5", "#fc8d62"] # Custom colors for Male/Female
explode = [0.05] * len(gender_counts) # Slightly explode each slice
plt.pie(
gender_counts,
labels=gender_counts.index,
autopct="%1.1f%%",
startangle=90,
colors=colors,
explode=explode,
shadow=True
)
plt.title("Gender Distribution of Patients")
plt.axis("equal") # Equal aspect ratio ensures pie is a circle.
plt.tight_layout()
plt.show()
In [10]:
# 🔐 Ensure all labels are lists and not NaN
df_images['Labels'] = df_images['Labels'].apply(lambda x: x if isinstance(x, list) else [])
# ✅ Binarize multi-labels for co-occurrence
mlb = MultiLabelBinarizer()
binary_labels = pd.DataFrame(mlb.fit_transform(df_images['Labels']), columns=mlb.classes_)
# 🧮 Compute correlation matrix
corr_matrix = binary_labels.corr()
# 📈 Plot heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap='coolwarm', square=True, linewidths=0.5)
plt.title('Co-occurrence Correlation Between Pathologies')
plt.tight_layout()
plt.show()
In [11]:
# 🧊 Filter correlation matrix using threshold
threshold = 0.3
filtered_corr_matrix = corr_matrix.where(abs(corr_matrix) >= threshold)
# 🔥 Plot only strong correlations
plt.figure(figsize=(12, 8))
sns.heatmap(filtered_corr_matrix, annot=True, fmt=".2f", cmap='coolwarm', linewidths=0.5)
plt.title(f'Filtered Correlation Matrix (Threshold ≥ {threshold})')
plt.tight_layout()
plt.show()
# ➕ Optional: Matrix with 0s for weak correlations
filtered_corr_matrix_zero = corr_matrix.mask(abs(corr_matrix) < threshold, 0)
display(filtered_corr_matrix_zero.round(2))
| Atelectasis | Cardiomegaly | Consolidation | Edema | Effusion | Emphysema | Fibrosis | Hernia | Infiltration | Mass | No Finding | Nodule | Pleural_Thickening | Pneumonia | Pneumothorax | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Atelectasis | 1.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | -0.35 | 0.0 | 0.0 | 0.0 | 0.0 |
| Cardiomegaly | 0.00 | 1.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 |
| Consolidation | 0.00 | 0.0 | 1.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 |
| Edema | 0.00 | 0.0 | 0.0 | 1.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 |
| Effusion | 0.00 | 0.0 | 0.0 | 0.0 | 1.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | -0.36 | 0.0 | 0.0 | 0.0 | 0.0 |
| Emphysema | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 1.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 |
| Fibrosis | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 1.0 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 |
| Hernia | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 1.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 |
| Infiltration | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 1.00 | 0.0 | -0.49 | 0.0 | 0.0 | 0.0 | 0.0 |
| Mass | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 1.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 |
| No Finding | -0.35 | 0.0 | 0.0 | 0.0 | -0.36 | 0.0 | 0.0 | 0.0 | -0.49 | 0.0 | 1.00 | 0.0 | 0.0 | 0.0 | 0.0 |
| Nodule | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.00 | 1.0 | 0.0 | 0.0 | 0.0 |
| Pleural_Thickening | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 1.0 | 0.0 | 0.0 |
| Pneumonia | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 1.0 | 0.0 |
| Pneumothorax | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 1.0 |
In [12]:
# 🔢 Count how many labels each image has
df_images['Num Labels'] = df_images['Labels'].apply(len)
# 📊 Plot: Distribution of label count per image
plt.figure(figsize=(8, 5))
ax = df_images['Num Labels'].value_counts().sort_index().plot(
kind='bar',
color='darkorange',
edgecolor='black'
)
# 🎯 Title and labels
plt.title('🔢 Number of Pathologies Per Image')
plt.xlabel('Number of Labels')
plt.ylabel('Image Count')
plt.grid(axis='y', linestyle='--', alpha=0.7)
# 📋 Add value labels on top of each bar
for i, val in enumerate(df_images['Num Labels'].value_counts().sort_index()):
plt.text(i, val + 2, str(val), ha='center', va='bottom', fontsize=9)
plt.tight_layout()
plt.show()
/tmp/ipykernel_17261/2392896313.py:22: UserWarning: Glyph 128290 (\N{INPUT SYMBOL FOR NUMBERS}) missing from font(s) DejaVu Sans.
plt.tight_layout()
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 128290 (\N{INPUT SYMBOL FOR NUMBERS}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
In [13]:
# 🧼 Ensure age is numeric and drop NaNs
df_images['Age'] = pd.to_numeric(df_images['Patient Age'], errors='coerce')
df_images = df_images.dropna(subset=['Age'])
# 🔎 Expand labels for multi-label visualization
exploded_df = df_images.explode('Labels')
# 📦 Drop rows with missing labels or gender
exploded_df = exploded_df.dropna(subset=['Labels', 'Patient Gender'])
# 📊 Boxplot: Age distribution by pathology
plt.figure(figsize=(14, 6))
sns.boxplot(data=exploded_df, x='Labels', y='Age', palette="Set2")
plt.xticks(rotation=45, ha='right')
plt.ylabel('Patient Age')
plt.title('📊 Age Distribution by Pathology')
plt.grid(True, axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
# 📊 Stacked Bar Chart: Pathology frequency by gender
gender_pathology_counts = exploded_df.groupby(['Labels', 'Patient Gender']).size().unstack().fillna(0)
# Plot
gender_pathology_counts.plot(
kind='bar',
stacked=True,
figsize=(14, 6),
colormap='coolwarm',
edgecolor='black'
)
plt.title('👥 Pathology Distribution by Gender')
plt.ylabel('Image Count')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
/tmp/ipykernel_17261/2095468132.py:13: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.
sns.boxplot(data=exploded_df, x='Labels', y='Age', palette="Set2")
/tmp/ipykernel_17261/2095468132.py:18: UserWarning: Glyph 128202 (\N{BAR CHART}) missing from font(s) DejaVu Sans.
plt.tight_layout()
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 128202 (\N{BAR CHART}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
/tmp/ipykernel_17261/2095468132.py:36: UserWarning: Glyph 128101 (\N{BUSTS IN SILHOUETTE}) missing from font(s) DejaVu Sans.
plt.tight_layout()
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 128101 (\N{BUSTS IN SILHOUETTE}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
In [14]:
import pandas as pd
import os
# Define correct file paths from uploaded files
bbox_path = "BBox_List_2017.csv"
labels_path = "Data_Entry_2017.csv"
images_dir = "images_001" # must exist in your working directory
# Load the CSVs
bbox_df = pd.read_csv(bbox_path)
labels_df = pd.read_csv(labels_path)
# Get the list of image filenames in the 'images_001' folder
image_files = set(os.listdir(images_dir))
# Filter both label and bbox data for images in 'images_001'
filtered_labels_df = labels_df[labels_df["Image Index"].isin(image_files)].copy()
filtered_bbox_df = bbox_df[bbox_df["Image Index"].isin(image_files)].copy()
# Count how many images have bounding boxes in images_001
num_with_bbox = filtered_bbox_df["Image Index"].nunique()
print(f"✅ Total images in images_001: {len(image_files)}")
print(f"🧾 Filtered label entries: {len(filtered_labels_df)}")
print(f"📦 Filtered bbox entries: {len(filtered_bbox_df)}")
print(f"🎯 Unique images with bbox: {num_with_bbox}")
✅ Total images in images_001: 4999 🧾 Filtered label entries: 4999 📦 Filtered bbox entries: 39 🎯 Unique images with bbox: 37
In [15]:
# Checkpoint 1
In [16]:
import pickle
# Create a folder for checkpoints if not already
os.makedirs("checkpoints", exist_ok=True)
# Save filtered labels and bbox dataframes
with open("checkpoints/filtered_labels_df.pkl", "wb") as f:
pickle.dump(filtered_labels_df, f)
with open("checkpoints/filtered_bbox_df.pkl", "wb") as f:
pickle.dump(filtered_bbox_df, f)
print("✅ Checkpoints saved to 'checkpoints/' folder.")
✅ Checkpoints saved to 'checkpoints/' folder.
In [17]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import os
def show_image_with_bbox(image_name, labels_df, bbox_df, image_dir="images_001"):
"""Display a chest X-ray with pathology label and bounding box."""
img_path = os.path.join(image_dir, image_name)
img = Image.open(img_path).convert("L")
# Get label
labels = labels_df[labels_df["Image Index"] == image_name]["Finding Labels"].values
label_str = labels[0] if len(labels) > 0 else "Unknown"
# Plot image
fig, ax = plt.subplots(1, figsize=(8, 8))
ax.imshow(img, cmap="gray")
ax.set_title(f"{image_name}\nLabels: {label_str}", fontsize=10)
# Overlay bounding box(es)
bboxes = bbox_df[bbox_df["Image Index"] == image_name]
for _, row in bboxes.iterrows():
x, y, w, h = row["Bbox [x"], row["y"], row["w"], row["h]"]
rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='red', facecolor='none')
ax.add_patch(rect)
plt.axis('off')
plt.tight_layout()
plt.show()
In [18]:
# Pick one image that has a bounding box
sample_bbox_image = filtered_bbox_df["Image Index"].iloc[0]
show_image_with_bbox(sample_bbox_image, filtered_labels_df, filtered_bbox_df)
In [19]:
def show_bbox_samples(bbox_df, labels_df, image_dir="images_001", n=3):
"""Display n sample images with bounding boxes and labels."""
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import os
# Unique images that have at least one bounding box
bbox_images = bbox_df["Image Index"].unique()
sample_images = random.sample(list(bbox_images), min(n, len(bbox_images)))
# Setup plot
fig, axes = plt.subplots(1, n, figsize=(6 * n, 6))
if n == 1:
axes = [axes] # Make iterable if just one image
for i, image_name in enumerate(sample_images):
img_path = os.path.join(image_dir, image_name)
img = Image.open(img_path).convert("L")
# Labels
label = labels_df[labels_df["Image Index"] == image_name]["Finding Labels"].values[0]
# Plot
ax = axes[i]
ax.imshow(img, cmap="gray")
ax.set_title(f"{image_name}\n🩺 {label}", fontsize=10)
ax.axis("off")
# Overlay bbox(es)
bboxes = bbox_df[bbox_df["Image Index"] == image_name]
for _, row in bboxes.iterrows():
x, y, w, h = row["Bbox [x"], row["y"], row["w"], row["h]"]
rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='red', facecolor='none')
ax.add_patch(rect)
plt.tight_layout()
plt.show()
show_bbox_samples(filtered_bbox_df, filtered_labels_df, image_dir="images_001", n=3)
/tmp/ipykernel_17261/3735723386.py:37: UserWarning: Glyph 129658 (\N{STETHOSCOPE}) missing from font(s) DejaVu Sans.
plt.tight_layout()
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 129658 (\N{STETHOSCOPE}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
In [20]:
# Week 2
In [21]:
#preprocess
In [22]:
from torchvision import transforms
# ✅ Define your preprocessing pipeline
resize = transforms.Resize((224, 224)) # Resize all images to 224x224
to_tensor = transforms.ToTensor() # Convert PIL to torch tensor [0, 1]
normalize = transforms.Normalize(mean=[0.5], std=[0.5]) # Normalize to [-1, 1]
# 👇 This will be used in a function to process each image later
In [23]:
# Apply Histogram equalization
In [24]:
import torch
def apply_histogram_equalization(tensor):
"""
Apply histogram equalization to a single-channel image tensor.
Must be shape [1, H, W]. Works on GPU or CPU.
"""
tensor = tensor.to(device) # ✅ Use CUDA if available
if tensor.dim() == 3 and tensor.shape[0] == 3:
# Convert RGB → Grayscale (average over channels)
tensor = torch.mean(tensor, dim=0, keepdim=True)
# Convert to 0–255 range
scaled = (tensor * 255).clamp(0, 255).to(torch.long)
flat = scaled.view(-1)
# Histogram & CDF
hist = torch.histc(flat.float(), bins=256, min=0, max=255)
cdf = torch.cumsum(hist, dim=0)
cdf_min = cdf[hist > 0].min() if (hist > 0).any() else 0
cdf_normalized = (cdf - cdf_min) / (flat.numel() - cdf_min)
cdf_normalized = cdf_normalized.clamp(0, 1)
# Equalize values
equalized = cdf_normalized[flat].view(tensor.shape)
return equalized.float() # Still in [0, 1]
In [25]:
# Apply and visualise on 4 samples
In [26]:
import matplotlib.pyplot as plt
from PIL import Image
import os
import torch
# 🔁 Sample and process 4 images
sample_images = list(filtered_labels_df["Image Index"])[:4]
processed_images = []
for i, img_name in enumerate(sample_images):
img_path = os.path.join("images_001", img_name)
image = Image.open(img_path).convert("RGB")
# Step-by-step preprocessing (on CPU)
original_tensor = to_tensor(image)
resized_tensor = resize(original_tensor)
equalized_tensor = apply_histogram_equalization(resized_tensor.cpu())
normalized_tensor = normalize(equalized_tensor)
# ✅ Plot: Image + Histogram (side-by-side)
fig, axes = plt.subplots(2, 2, figsize=(10, 6))
# Resized image
axes[0, 0].imshow(resized_tensor.permute(1, 2, 0).cpu().numpy())
axes[0, 0].set_title("Resized")
axes[0, 0].axis("off")
# Histogram of resized
axes[1, 0].hist(resized_tensor.cpu().view(-1).numpy(), bins=256, range=(0, 1), color='gray')
axes[1, 0].set_title("Resized Histogram")
# Equalized image
axes[0, 1].imshow(equalized_tensor.squeeze().cpu().numpy(), cmap="gray")
axes[0, 1].set_title("Equalized")
axes[0, 1].axis("off")
# Histogram of equalized
axes[1, 1].hist(equalized_tensor.cpu().view(-1).numpy(), bins=256, range=(0, 1), color='black')
axes[1, 1].set_title("Equalized Histogram")
plt.suptitle(f"🩻 {img_name}", fontsize=12)
plt.tight_layout()
plt.show()
processed_images.append(normalized_tensor)
# Final batch tensor (still CPU-based)
processed_images = torch.stack(processed_images)
print(f"✅ Sample batch shape: {processed_images.shape}")
/tmp/ipykernel_17261/222232096.py:42: UserWarning: Glyph 129659 (\U0001fa7b) missing from font(s) DejaVu Sans. plt.tight_layout() /home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 129659 (\U0001fa7b) missing from font(s) DejaVu Sans. fig.canvas.print_figure(bytes_io, **kw)
✅ Sample batch shape: torch.Size([4, 1, 224, 224])
In [27]:
from tqdm import tqdm
from PIL import Image
import torch
import gc
processed_images = []
image_names = []
for img_name in tqdm(filtered_labels_df["Image Index"], desc="🚀 GPU-First Preprocessing"):
img_path = os.path.join("images_001", img_name)
try:
image = Image.open(img_path).convert("RGB")
# ⚡ Attempt full pipeline on GPU first
tensor = to_tensor(image).to(device) # Convert + Send to GPU
tensor = resize(tensor)
tensor = apply_histogram_equalization(tensor) # Run EQ on GPU
tensor = normalize(tensor)
processed_images.append(tensor.cpu()) # Move back to CPU to store
image_names.append(img_name)
except RuntimeError as e:
if "out of memory" in str(e):
print(f"⚠️ OOM on {img_name} — retrying on CPU")
torch.cuda.empty_cache()
gc.collect()
try:
# Retry on CPU
tensor = to_tensor(image)
tensor = resize(tensor)
tensor = apply_histogram_equalization(tensor.cpu())
tensor = normalize(tensor)
processed_images.append(tensor)
image_names.append(img_name)
except Exception as cpu_e:
print(f"❌ Failed on CPU too: {img_name} | {cpu_e}")
else:
print(f"❌ Runtime error: {img_name} | {e}")
except Exception as general_e:
print(f"❌ Error processing {img_name}: {general_e}")
# 💾 Cleanup after each image
torch.cuda.empty_cache()
gc.collect()
# ✅ Stack and store
processed_images_tensor = torch.stack(processed_images)
torch.cuda.empty_cache()
gc.collect()
print(f"\n✅ Final shape: {processed_images_tensor.shape}")
print(f"📍 Stored on: {processed_images_tensor.device} (should be 'cpu')")
🚀 GPU-First Preprocessing: 100%|███████████| 4999/4999 [15:57<00:00, 5.22it/s]
✅ Final shape: torch.Size([4999, 1, 224, 224]) 📍 Stored on: cpu (should be 'cpu')
In [28]:
#checkpoint 2
In [29]:
import pickle
import os
import torch
# 📁 Ensure checkpoint folder exists
os.makedirs("checkpoints", exist_ok=True)
# 💾 Save processed image tensor
torch.save(processed_images_tensor, "checkpoints/processed_images_tensor.pt")
# 💾 Save image name list
with open("checkpoints/image_names.pkl", "wb") as f:
pickle.dump(image_names, f)
print("Processed image tensor and filenames saved.")
clean_gpu()
Processed image tensor and filenames saved. 🧹 GPU/CPU memory cleaned.
In [30]:
#load checkpoint
In [31]:
# 🔁 Load image tensor
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
# 🔁 Load image names
with open("checkpoints/image_names.pkl", "rb") as f:
image_names = pickle.load(f)
print("✅ Checkpoints reloaded.")
/tmp/ipykernel_17261/1072617477.py:2: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
✅ Checkpoints reloaded.
In [32]:
# Check shape of final tensor (should be [4999, 1, 224, 224])
print(f"✅ Shape of processed dataset: {processed_images_tensor.shape}")
# Sanity check: dtype and device
print(f"Tensor dtype: {processed_images_tensor.dtype}")
print(f"Stored on device: {processed_images_tensor.device}")
✅ Shape of processed dataset: torch.Size([4999, 1, 224, 224]) Tensor dtype: torch.float32 Stored on device: cpu
In [33]:
#splitting
In [34]:
from sklearn.model_selection import train_test_split
# 🧮 Total number of processed images
indices = list(range(len(image_names)))
# 🔁 Use fixed seed for reproducibility
train_idx, temp_idx = train_test_split(indices, test_size=0.3, random_state=42) # 70% train
val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, random_state=42) # 15% val/test
# 🗃️ Print sizes
print(f"Train set size: {len(train_idx)}")
print(f"Validation set size: {len(val_idx)}")
print(f"Test set size: {len(test_idx)}")
Train set size: 3499 Validation set size: 750 Test set size: 750
In [35]:
# 📦 Slice the preprocessed image tensor
train_tensor = processed_images_tensor[train_idx]
val_tensor = processed_images_tensor[val_idx]
test_tensor = processed_images_tensor[test_idx]
# 🧾 Print shapes
print(f"Train tensor shape: {train_tensor.shape}")
print(f"Validation tensor shape: {val_tensor.shape}")
print(f"Test tensor shape: {test_tensor.shape}")
Train tensor shape: torch.Size([3499, 1, 224, 224]) Validation tensor shape: torch.Size([750, 1, 224, 224]) Test tensor shape: torch.Size([750, 1, 224, 224])
In [36]:
# checkpoint 3
In [37]:
import torch
import os
# 📁 Ensure checkpoint directory exists
os.makedirs("checkpoints", exist_ok=True)
# 💾 Save train/val/test tensors
torch.save(train_tensor, "checkpoints/train_tensor.pt")
torch.save(val_tensor, "checkpoints/val_tensor.pt")
torch.save(test_tensor, "checkpoints/test_tensor.pt")
# ✅ Define and call clean_gpu()
def clean_gpu():
torch.cuda.empty_cache()
gc.collect()
clean_gpu()
print("✅ Tensor splits saved with GPU cleanup.")
#load
train_tensor = torch.load("checkpoints/train_tensor.pt")
val_tensor = torch.load("checkpoints/val_tensor.pt")
test_tensor = torch.load("checkpoints/test_tensor.pt")
print("✅ Loaded tensor splits from checkpoint.")
✅ Tensor splits saved with GPU cleanup.
/tmp/ipykernel_17261/3098558247.py:23: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
train_tensor = torch.load("checkpoints/train_tensor.pt")
✅ Loaded tensor splits from checkpoint.
/tmp/ipykernel_17261/3098558247.py:24: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
val_tensor = torch.load("checkpoints/val_tensor.pt")
/tmp/ipykernel_17261/3098558247.py:25: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
test_tensor = torch.load("checkpoints/test_tensor.pt")
In [38]:
# traditional Features
In [39]:
import cv2
import numpy as np
from skimage.feature import local_binary_pattern, hog
import matplotlib.pyplot as plt
def extract_traditional_features(tensor_img):
"""
Extract and visualize Canny, LBP, and HOG features from a single image tensor.
Input: torch.Tensor shape [1, H, W] or [H, W], assumed to be on CPU
"""
# ✅ Ensure it's CPU and numpy-compatible
if tensor_img.dim() == 3:
img = tensor_img.squeeze().cpu().numpy()
else:
img = tensor_img.cpu().numpy()
# 🖼️ Convert to uint8 format for OpenCV
img_uint8 = (img * 255).astype(np.uint8)
# 🔍 Canny edge detection
canny = cv2.Canny(img_uint8, 100, 200)
# 🔁 Local Binary Pattern (texture)
lbp = local_binary_pattern(img_uint8, P=8, R=1, method='uniform')
# 📈 HOG (gradient-based shape)
_, hog_img = hog(img_uint8, pixels_per_cell=(16, 16),
cells_per_block=(2, 2), orientations=9,
visualize=True)
# 📊 Plot all side-by-side
fig, axes = plt.subplots(1, 4, figsize=(16, 4))
axes[0].imshow(img_uint8, cmap='gray')
axes[0].set_title("Original")
axes[1].imshow(canny, cmap='gray')
axes[1].set_title("Canny Edges")
axes[2].imshow(lbp, cmap='gray')
axes[2].set_title("Local Binary Pattern")
axes[3].imshow(hog_img, cmap='gray')
axes[3].set_title("HOG Features")
for ax in axes:
ax.axis('off')
plt.tight_layout()
plt.show()
In [40]:
# Run on the first image in the training set
extract_traditional_features(train_tensor[0])
In [41]:
def extract_traditional_features_batch(tensor_batch):
"""
Extract Canny, LBP, and HOG features from a batch of image tensors.
Returns: List of feature vectors (1D numpy arrays per image)
"""
feature_list = []
for tensor_img in tqdm(tensor_batch, desc="🔎 Extracting traditional features"):
# Make sure the tensor is on CPU and numpy-friendly
img = tensor_img.squeeze().cpu().numpy()
img_uint8 = (img * 255).astype(np.uint8)
# --- Canny ---
canny = cv2.Canny(img_uint8, 100, 200).flatten()
# --- LBP ---
lbp = local_binary_pattern(img_uint8, P=8, R=1, method='uniform').flatten()
# --- HOG ---
hog_features, _ = hog(img_uint8, pixels_per_cell=(16, 16),
cells_per_block=(2, 2), orientations=9,
visualize=True, feature_vector=True)
# Concatenate all features into one long vector
combined = np.concatenate([canny, lbp, hog_features])
feature_list.append(combined)
return feature_list
In [42]:
# Extract for all splits
train_traditional_features = extract_traditional_features_batch(train_tensor)
val_traditional_features = extract_traditional_features_batch(val_tensor)
test_traditional_features = extract_traditional_features_batch(test_tensor)
# ✅ Check shapes
print(f"Train set: {len(train_traditional_features)} features")
print(f"Feature vector length (1st image): {len(train_traditional_features[0])}")
🔎 Extracting traditional features: 100%|███| 3499/3499 [03:02<00:00, 19.20it/s] 🔎 Extracting traditional features: 100%|█████| 750/750 [00:38<00:00, 19.59it/s] 🔎 Extracting traditional features: 100%|█████| 750/750 [00:38<00:00, 19.29it/s]
Train set: 3499 features Feature vector length (1st image): 106436
In [43]:
#checkpoint 4
In [44]:
import os
import pickle
import gc
# 📁 Ensure folder exists
os.makedirs("checkpoints/traditional_features", exist_ok=True)
# 💾 Save features
with open("checkpoints/traditional_features/train_traditional_features.pkl", "wb") as f:
pickle.dump(train_traditional_features, f)
with open("checkpoints/traditional_features/val_traditional_features.pkl", "wb") as f:
pickle.dump(val_traditional_features, f)
with open("checkpoints/traditional_features/test_traditional_features.pkl", "wb") as f:
pickle.dump(test_traditional_features, f)
clean_gpu()
print("✅ Traditional features checkpointed and memory cleaned.")
✅ Traditional features checkpointed and memory cleaned.
In [45]:
#load
In [46]:
import os
import pickle
import gc
with open("checkpoints/traditional_features/train_traditional_features.pkl", "rb") as f:
train_traditional_features = pickle.load(f)
with open("checkpoints/traditional_features/val_traditional_features.pkl", "rb") as f:
val_traditional_features = pickle.load(f)
with open("checkpoints/traditional_features/test_traditional_features.pkl", "rb") as f:
test_traditional_features = pickle.load(f)
print("✅ Reloaded traditional features from checkpoint.")
✅ Reloaded traditional features from checkpoint.
In [47]:
# Pca , Tsne and Umap
In [48]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import umap
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# ✂️ Use a subset for crash-safe visualizations
sample_count = 500
X_sample = np.array(train_traditional_features[:sample_count])
y_sample = [label.split('|')[0] for label in filtered_labels_df.iloc[train_idx[:sample_count]]["Finding Labels"]]
y_encoded = pd.factorize(y_sample)[0]
# 📈 Run PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_sample)
pca_variance = np.sum(pca.explained_variance_ratio_)
pca_silhouette = silhouette_score(X_pca, y_encoded)
# 🌌 Run t-SNE
tsne = TSNE(n_components=2, perplexity=30, random_state=42, n_iter=1000)
X_tsne = tsne.fit_transform(X_sample)
tsne_silhouette = silhouette_score(X_tsne, y_encoded)
# 🧭 Run UMAP
reducer = umap.UMAP(n_components=2, random_state=42)
X_umap = reducer.fit_transform(X_sample)
umap_silhouette = silhouette_score(X_umap, y_encoded)
# 📊 Plot all 3
fig, axs = plt.subplots(1, 3, figsize=(18, 5))
axs[0].scatter(X_pca[:, 0], X_pca[:, 1], c=y_encoded, cmap='tab10', edgecolor='k', alpha=0.7)
axs[0].set_title(f"PCA\nVariance: {pca_variance:.2f} | Silhouette: {pca_silhouette:.3f}")
axs[1].scatter(X_tsne[:, 0], X_tsne[:, 1], c=y_encoded, cmap='tab10', edgecolor='k', alpha=0.7)
axs[1].set_title(f"t-SNE\nSilhouette: {tsne_silhouette:.3f}")
axs[2].scatter(X_umap[:, 0], X_umap[:, 1], c=y_encoded, cmap='tab10', edgecolor='k', alpha=0.7)
axs[2].set_title(f"UMAP\nSilhouette: {umap_silhouette:.3f}")
for ax in axs:
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel('')
ax.set_ylabel('')
plt.suptitle("🌐 Dimensionality Reduction: PCA vs t-SNE vs UMAP", fontsize=14)
plt.tight_layout()
plt.show()
/tmp/ipykernel_17261/3069774393.py:13: FutureWarning: factorize with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.
y_encoded = pd.factorize(y_sample)[0]
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/sklearn/manifold/_t_sne.py:1164: FutureWarning: 'n_iter' was renamed to 'max_iter' in version 1.5 and will be removed in 1.7.
warnings.warn(
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
warnings.warn(
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
warn(
/tmp/ipykernel_17261/3069774393.py:50: UserWarning: Glyph 127760 (\N{GLOBE WITH MERIDIANS}) missing from font(s) DejaVu Sans.
plt.tight_layout()
/home/kingpin/anaconda3/envs/mldl/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 127760 (\N{GLOBE WITH MERIDIANS}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
In [49]:
#CNN extraction
In [ ]:
#define Resnet
In [51]:
import torchvision.models as models
import torch.nn as nn
from torchvision.models import ResNet18_Weights
# ⚙️ Load pre-trained ResNet18 (no FC head)
resnet = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
resnet = nn.Sequential(*list(resnet.children())[:-1]) # remove final FC → output: (B, 512, 1, 1)
# 🎯 Move to available device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = resnet.to(device)
resnet.eval()
print("✅ ResNet18 loaded on:", device)
✅ ResNet18 loaded on: cuda
In [52]:
from tqdm import tqdm
import torch
import gc
import os
# 📦 Create folder if needed
os.makedirs("checkpoints/cnn_features", exist_ok=True)
# ♻️ Cleanup
def clean_gpu():
torch.cuda.empty_cache()
gc.collect()
# ⚙️ CNN feature extractor (batched, device-safe)
def extract_cnn_features_batched(tensor_batch, model, batch_size=16):
features = []
model.eval()
for i in tqdm(range(0, len(tensor_batch), batch_size), desc="🧠 CNN Feature Extraction (train)"):
batch = tensor_batch[i:i+batch_size]
# Convert grayscale to RGB [B, 1, H, W] → [B, 3, H, W]
batch_rgb = batch.repeat(1, 3, 1, 1).to(device)
with torch.no_grad():
out = model(batch_rgb) # (B, 512, 1, 1)
out = out.view(out.size(0), -1) # Flatten to (B, 512)
features.append(out.cpu())
del batch, batch_rgb, out
clean_gpu()
return torch.cat(features, dim=0)
# 🚀 Run for train set
train_cnn_features = extract_cnn_features_batched(train_tensor, resnet)
# 💾 Save checkpoint
torch.save(train_cnn_features, "checkpoints/cnn_features/train_cnn_features.pt")
clean_gpu()
print("✅ Saved CNN features for train set!")
🧠 CNN Feature Extraction (train): 100%|██████| 219/219 [00:44<00:00, 4.87it/s]
✅ Saved CNN features for train set!
In [53]:
# 🚀 Run for validation set
val_cnn_features = extract_cnn_features_batched(val_tensor, resnet)
# 💾 Save checkpoint
torch.save(val_cnn_features, "checkpoints/cnn_features/val_cnn_features.pt")
clean_gpu()
print("✅ Saved CNN features for validation set!")
🧠 CNN Feature Extraction (train): 100%|████████| 47/47 [00:10<00:00, 4.62it/s]
✅ Saved CNN features for validation set!
In [54]:
# 🚀 Run for test set
test_cnn_features = extract_cnn_features_batched(test_tensor, resnet)
# 💾 Save checkpoint
torch.save(test_cnn_features, "checkpoints/cnn_features/test_cnn_features.pt")
clean_gpu()
print("✅ Saved CNN features for test set!")
🧠 CNN Feature Extraction (train): 100%|████████| 47/47 [00:09<00:00, 4.81it/s]
✅ Saved CNN features for test set!
In [55]:
import os
import torch
import gc
def save_cnn_features_all(train_feat, val_feat, test_feat):
os.makedirs("checkpoints/cnn_features", exist_ok=True)
torch.save(train_feat, "checkpoints/cnn_features/train_cnn_features.pt")
torch.save(val_feat, "checkpoints/cnn_features/val_cnn_features.pt")
torch.save(test_feat, "checkpoints/cnn_features/test_cnn_features.pt")
clean_gpu()
print("✅ All CNN features checkpointed:")
print(" • train_cnn_features.pt")
print(" • val_cnn_features.pt")
print(" • test_cnn_features.pt")
save_cnn_features_all(train_cnn_features, val_cnn_features, test_cnn_features)
clean_gpu()
✅ All CNN features checkpointed: • train_cnn_features.pt • val_cnn_features.pt • test_cnn_features.pt
In [56]:
import os
import torch
def load_cnn_features_all():
base_path = "checkpoints/cnn_features"
train_feat = torch.load(os.path.join(base_path, "train_cnn_features.pt"))
val_feat = torch.load(os.path.join(base_path, "val_cnn_features.pt"))
test_feat = torch.load(os.path.join(base_path, "test_cnn_features.pt"))
print("✅ CNN features loaded:")
print(f" • train: {train_feat.shape}")
print(f" • val: {val_feat.shape}")
print(f" • test: {test_feat.shape}")
return train_feat, val_feat, test_feat
# 🔁 Load when needed
train_cnn_features, val_cnn_features, test_cnn_features = load_cnn_features_all()
✅ CNN features loaded: • train: torch.Size([3499, 512]) • val: torch.Size([750, 512]) • test: torch.Size([750, 512])
/tmp/ipykernel_17261/3270195733.py:7: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. train_feat = torch.load(os.path.join(base_path, "train_cnn_features.pt")) /tmp/ipykernel_17261/3270195733.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. val_feat = torch.load(os.path.join(base_path, "val_cnn_features.pt")) /tmp/ipykernel_17261/3270195733.py:9: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. test_feat = torch.load(os.path.join(base_path, "test_cnn_features.pt"))
In [ ]:
#perparation for hybrid feature
In [57]:
import numpy as np
import os
import gc
def fuse_and_save_hybrid_features(
train_trad, val_trad, test_trad,
train_cnn, val_cnn, test_cnn
):
os.makedirs("checkpoints/hybrid_features", exist_ok=True)
# 🔗 Fuse by horizontal stacking
hybrid_train = np.concatenate([train_trad, train_cnn.numpy()], axis=1)
hybrid_val = np.concatenate([val_trad, val_cnn.numpy()], axis=1)
hybrid_test = np.concatenate([test_trad, test_cnn.numpy()], axis=1)
# 💾 Save as .npy
np.save("checkpoints/hybrid_features/hybrid_train.npy", hybrid_train)
np.save("checkpoints/hybrid_features/hybrid_val.npy", hybrid_val)
np.save("checkpoints/hybrid_features/hybrid_test.npy", hybrid_test)
clean_gpu()
print("✅ Hybrid features saved:")
print(f" • train: {hybrid_train.shape}")
print(f" • val: {hybrid_val.shape}")
print(f" • test: {hybrid_test.shape}")
In [58]:
fuse_and_save_hybrid_features(
train_traditional_features, val_traditional_features, test_traditional_features,
train_cnn_features, val_cnn_features, test_cnn_features
)
✅ Hybrid features saved: • train: (3499, 106948) • val: (750, 106948) • test: (750, 106948)
In [ ]:
#Self contained hybrid feature loader+labels
In [60]:
import numpy as np
import pickle
from sklearn.preprocessing import MultiLabelBinarizer
# 📦 Load hybrid features
hybrid_train = np.load("checkpoints/hybrid_features/hybrid_train.npy")
hybrid_val = np.load("checkpoints/hybrid_features/hybrid_val.npy")
hybrid_test = np.load("checkpoints/hybrid_features/hybrid_test.npy")
# 🧾 Load filtered labels DataFrame
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
# 🏷️ Create multi-hot target vectors
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
# 📊 Split targets using your index splits
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
train_idx, val_idx, test_idx = splits["train_idx"], splits["val_idx"], splits["test_idx"]
y_train = targets[train_idx]
y_val = targets[val_idx]
y_test = targets[test_idx]
print("✅ Loaded hybrid features and binarized targets:")
print(" • hybrid_train:", hybrid_train.shape)
print(" • y_train:", y_train.shape)
✅ Loaded hybrid features and binarized targets: • hybrid_train: (3499, 106948) • y_train: (3499, 15)
In [ ]:
# Mlp classsifier Training block (loaded from last checkpoint)
In [61]:
import os
import gc
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
# ⚙️ MLP architecture (modular)
class MLPClassifier(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, 1024),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, output_dim)
)
def forward(self, x):
return self.net(x)
# 📦 Load hybrid features
hybrid_train = np.load("checkpoints/hybrid_features/hybrid_train.npy")
hybrid_val = np.load("checkpoints/hybrid_features/hybrid_val.npy")
# 🧾 Load labels
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
train_idx, val_idx = splits["train_idx"], splits["val_idx"]
y_train = targets[train_idx]
y_val = targets[val_idx]
# 🧠 Torch datasets
X_train_tensor = torch.tensor(hybrid_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(hybrid_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val_tensor, y_val_tensor), batch_size=64, shuffle=False)
# 🎯 Train config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPClassifier(input_dim=X_train_tensor.shape[1], output_dim=y_train_tensor.shape[1]).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# 🚀 Training loop
epochs = 10
for epoch in range(epochs):
model.train()
total_loss = 0
for xb, yb in train_loader:
xb, yb = xb.to(device), yb.to(device)
optimizer.zero_grad()
out = model(xb)
loss = criterion(out, yb)
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(train_loader)
print(f"Epoch {epoch+1}/{epochs} | 🔧 Train Loss: {avg_loss:.4f}")
model.eval()
val_loss = 0
with torch.no_grad():
for xb, yb in val_loader:
xb, yb = xb.to(device), yb.to(device)
out = model(xb)
val_loss += criterion(out, yb).item()
val_loss /= len(val_loader)
print(f" ↳ 🧪 Val Loss: {val_loss:.4f}")
# 💾 Save model
os.makedirs("checkpoints/models", exist_ok=True)
torch.save(model.state_dict(), "checkpoints/models/mlp_classifier.pt")
clean_gpu()
print("✅ MLP classifier trained & saved to checkpoints/models/mlp_classifier.pt")
Epoch 1/10 | 🔧 Train Loss: 13.0477
↳ 🧪 Val Loss: 0.2759
Epoch 2/10 | 🔧 Train Loss: 0.5969
↳ 🧪 Val Loss: 0.2792
Epoch 3/10 | 🔧 Train Loss: 0.5488
↳ 🧪 Val Loss: 0.3209
Epoch 4/10 | 🔧 Train Loss: 0.5355
↳ 🧪 Val Loss: 0.2896
Epoch 5/10 | 🔧 Train Loss: 0.5429
↳ 🧪 Val Loss: 0.4077
Epoch 6/10 | 🔧 Train Loss: 0.4455
↳ 🧪 Val Loss: 0.2708
Epoch 7/10 | 🔧 Train Loss: 0.3699
↳ 🧪 Val Loss: 0.2785
Epoch 8/10 | 🔧 Train Loss: 0.3033
↳ 🧪 Val Loss: 0.2482
Epoch 9/10 | 🔧 Train Loss: 0.2708
↳ 🧪 Val Loss: 0.2378
Epoch 10/10 | 🔧 Train Loss: 0.2890
↳ 🧪 Val Loss: 0.2203
✅ MLP classifier trained & saved to checkpoints/models/mlp_classifier.pt
In [ ]:
# evaluation block for Mlp (self contained)
In [62]:
import torch
import numpy as np
from sklearn.metrics import f1_score, roc_auc_score, hamming_loss, classification_report
import json
def clean_gpu():
torch.cuda.empty_cache()
gc.collect()
def evaluate_model(model, X, y_true, device, name="test"):
model.eval()
with torch.no_grad():
X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
logits = model(X_tensor)
probs = torch.sigmoid(logits).cpu().numpy()
preds = (probs >= 0.5).astype(int)
# 📊 Metrics
metrics = {
f"{name}_macro_f1": f1_score(y_true, preds, average="macro", zero_division=0),
f"{name}_micro_f1": f1_score(y_true, preds, average="micro", zero_division=0),
f"{name}_hamming_loss": hamming_loss(y_true, preds),
f"{name}_macro_auc": roc_auc_score(y_true, probs, average="macro", multi_class='ovr')
}
print(f"\n📈 {name.upper()} METRICS:")
for k, v in metrics.items():
print(f"• {k}: {v:.4f}")
# 💾 Save preds
os.makedirs("checkpoints/preds", exist_ok=True)
np.save(f"checkpoints/preds/{name}_probs.npy", probs)
np.save(f"checkpoints/preds/{name}_preds.npy", preds)
return metrics
# 📦 Load test and val data
hybrid_test = np.load("checkpoints/hybrid_features/hybrid_test.npy")
hybrid_val = np.load("checkpoints/hybrid_features/hybrid_val.npy")
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
y_test = targets[splits["test_idx"]]
y_val = targets[splits["val_idx"]]
# 🔁 Reload trained model
input_dim = hybrid_test.shape[1]
output_dim = y_test.shape[1]
model = MLPClassifier(input_dim, output_dim).to(device)
model.load_state_dict(torch.load("checkpoints/models/mlp_classifier.pt"))
model.eval()
# ✅ Evaluate on test and val
test_metrics = evaluate_model(model, hybrid_test, y_test, device, name="test")
val_metrics = evaluate_model(model, hybrid_val, y_val, device, name="val")
# 💾 Save all metrics
metrics_path = "checkpoints/models/metrics.json"
with open(metrics_path, "w") as f:
json.dump({**test_metrics, **val_metrics}, f, indent=4)
print(f"\n✅ All metrics saved to {metrics_path}")
clean_gpu()
/tmp/ipykernel_17261/2966811590.py:59: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
model.load_state_dict(torch.load("checkpoints/models/mlp_classifier.pt"))
📈 TEST METRICS: • test_macro_f1: 0.0482 • test_micro_f1: 0.4979 • test_hamming_loss: 0.0762 • test_macro_auc: 0.5001 📈 VAL METRICS: • val_macro_f1: 0.0468 • val_micro_f1: 0.4877 • val_hamming_loss: 0.0758 • val_macro_auc: 0.4984 ✅ All metrics saved to checkpoints/models/metrics.json
In [ ]:
# We see per class scores
In [63]:
from sklearn.metrics import classification_report
import numpy as np
import json
import os
# 📦 Load predictions and labels (from previous step)
test_preds = np.load("checkpoints/preds/test_preds.npy")
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
# 🏷️ Get multilabel classes
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
mlb.fit(filtered_labels_df["Finding Labels"].str.split("|"))
# 🧾 Load test ground truth
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
y_test = mlb.transform(filtered_labels_df["Finding Labels"].str.split("|"))[splits["test_idx"]]
# 📊 Generate per-class report
report = classification_report(
y_test,
test_preds,
target_names=mlb.classes_,
output_dict=True,
zero_division=0
)
# 💾 Save report as JSON
os.makedirs("checkpoints/metrics", exist_ok=True)
with open("checkpoints/metrics/per_class_report.json", "w") as f:
json.dump(report, f, indent=4)
# 🖨️ Print summary
print("\n📋 PER-CLASS CLASSIFICATION REPORT (TOP 5 F1):")
f1s = {cls: vals['f1-score'] for cls, vals in report.items() if cls in mlb.classes_}
top5 = sorted(f1s.items(), key=lambda x: x[1], reverse=True)[:5]
for label, f1 in top5:
print(f"• {label:20} — F1: {f1:.4f}")
📋 PER-CLASS CLASSIFICATION REPORT (TOP 5 F1): • No Finding — F1: 0.7234 • Atelectasis — F1: 0.0000 • Cardiomegaly — F1: 0.0000 • Consolidation — F1: 0.0000 • Edema — F1: 0.0000
In [ ]:
# visualization of f1 scores
In [64]:
import matplotlib.pyplot as plt
import json
# 📦 Load per-class report
with open("checkpoints/metrics/per_class_report.json", "r") as f:
report = json.load(f)
# 🎯 Extract F1-scores only (filter out avg/accuracy)
f1_scores = {cls: vals['f1-score'] for cls, vals in report.items() if cls not in ["accuracy", "macro avg", "weighted avg"]}
# 🪄 Sort by F1
sorted_f1 = sorted(f1_scores.items(), key=lambda x: x[1], reverse=True)
# 🎨 Plot
labels, scores = zip(*sorted_f1)
plt.figure(figsize=(10, 5))
plt.barh(labels, scores, height=0.6)
plt.gca().invert_yaxis()
plt.xlabel("F1 Score")
plt.title("Per-Class F1 Scores (Test Set)")
plt.tight_layout()
# 💾 Save
os.makedirs("checkpoints/plots", exist_ok=True)
plt.savefig("checkpoints/plots/per_class_f1_test.png", dpi=300)
plt.show()
In [ ]:
# Unet segmentation
In [65]:
import os
import pandas as pd
import torch
import numpy as np
from PIL import Image, ImageDraw
# 📁 Paths
bbox_path = "BBox_List_2017.csv"
bbox_mask_dir = "checkpoints/bbox_masks"
os.makedirs(bbox_mask_dir, exist_ok=True)
# 📦 Load bbox dataframe
bbox_df = pd.read_csv(bbox_path)
# 📦 Load image names in split
with open("checkpoints/image_names.pkl", "rb") as f:
image_names = pickle.load(f)
# 🔍 Filter only those bbox entries that match your current dataset
bbox_df = bbox_df[bbox_df["Image Index"].isin(image_names)]
# 🖼️ Output size
mask_size = (224, 224)
# ♻️ Clear GPU
def clean_gpu():
torch.cuda.empty_cache()
import gc
gc.collect()
# 🔁 Create binary masks
for img_name in tqdm(image_names, desc="🧠 Generating BBox masks"):
img_bboxes = bbox_df[bbox_df["Image Index"] == img_name]
if img_bboxes.empty:
continue
mask = Image.new("L", mask_size, 0)
draw = ImageDraw.Draw(mask)
for _, row in img_bboxes.iterrows():
x, y, w, h = row["Bbox [x"], row["y"], row["w"], row["h]"]
x1 = int(x / 1024 * mask_size[0])
y1 = int(y / 1024 * mask_size[1])
x2 = int((x + w) / 1024 * mask_size[0])
y2 = int((y + h) / 1024 * mask_size[1])
draw.rectangle([x1, y1, x2, y2], fill=1)
# Convert to tensor and save
mask_tensor = torch.tensor(np.array(mask), dtype=torch.float32).unsqueeze(0) # Shape: [1, 224, 224]
torch.save(mask_tensor, os.path.join(bbox_mask_dir, img_name.replace(".png", ".pt")))
clean_gpu()
print("✅ All BBox-based masks saved to bbox_masks/")
🧠 Generating BBox masks: 100%|███████████| 4999/4999 [00:01<00:00, 4352.88it/s]
✅ All BBox-based masks saved to bbox_masks/
In [ ]:
# Defining Unet
In [66]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import os
from tqdm import tqdm
import gc
# 🧠 Lightweight U-Net (sufficient for mask training)
class UNet(nn.Module):
def __init__(self):
super().__init__()
def CBR(in_c, out_c):
return nn.Sequential(
nn.Conv2d(in_c, out_c, 3, padding=1),
nn.BatchNorm2d(out_c),
nn.ReLU(inplace=True)
)
self.enc1 = CBR(1, 32)
self.enc2 = CBR(32, 64)
self.pool = nn.MaxPool2d(2, 2)
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.dec1 = CBR(64, 32)
self.out = nn.Conv2d(32, 1, 1)
def forward(self, x):
x1 = self.enc1(x)
x2 = self.pool(x1)
x3 = self.enc2(x2)
x4 = self.up(x3)
x5 = self.dec1(x4)
return torch.sigmoid(self.out(x5))
# 🧹 Clean GPU
def clean_gpu():
torch.cuda.empty_cache()
gc.collect()
# 📦 Custom dataset
class UNetDataset(Dataset):
def __init__(self, tensor_data, image_names, mask_dir):
self.images = tensor_data
self.image_names = image_names
self.mask_dir = mask_dir
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img = self.images[idx]
name = self.image_names[idx]
mask_path = os.path.join(self.mask_dir, name.replace(".png", ".pt"))
mask = torch.load(mask_path) if os.path.exists(mask_path) else torch.zeros((1, 224, 224))
return img, mask
In [ ]:
# Training unet
In [67]:
# 📦 Load train images & names
train_tensor = torch.load("checkpoints/train_tensor.pt")
with open("checkpoints/image_names.pkl", "rb") as f:
image_names = pickle.load(f)
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
train_names = [image_names[i] for i in splits["train_idx"]]
# 🧠 Dataset + Loader
train_ds = UNetDataset(train_tensor, train_names, mask_dir="checkpoints/bbox_masks")
train_dl = DataLoader(train_ds, batch_size=16, shuffle=True)
# 🎯 Train U-Net
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
unet = UNet().to(device)
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(unet.parameters(), lr=1e-3)
epochs = 8
for epoch in range(epochs):
unet.train()
total_loss = 0
for xb, yb in tqdm(train_dl, desc=f"🔁 Epoch {epoch+1}/{epochs}"):
xb, yb = xb.to(device), yb.to(device)
optimizer.zero_grad()
out = unet(xb)
loss = loss_fn(out, yb)
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f"📉 Epoch {epoch+1} Loss: {total_loss / len(train_dl):.4f}")
clean_gpu()
# 💾 Save model
os.makedirs("checkpoints/unet", exist_ok=True)
torch.save(unet.state_dict(), "checkpoints/unet/unet_bbox.pt")
print("✅ U-Net trained and saved to checkpoints/unet/unet_bbox.pt")
/tmp/ipykernel_17261/3395340981.py:2: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
train_tensor = torch.load("checkpoints/train_tensor.pt")
/tmp/ipykernel_17261/1579453490.py:52: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
mask = torch.load(mask_path) if os.path.exists(mask_path) else torch.zeros((1, 224, 224))
🔁 Epoch 1/8: 100%|███████████████████████████| 219/219 [00:15<00:00, 13.87it/s]
📉 Epoch 1 Loss: 0.2264
🔁 Epoch 2/8: 100%|███████████████████████████| 219/219 [00:15<00:00, 14.58it/s]
📉 Epoch 2 Loss: 0.0319
🔁 Epoch 3/8: 100%|███████████████████████████| 219/219 [00:15<00:00, 14.46it/s]
📉 Epoch 3 Loss: 0.0129
🔁 Epoch 4/8: 100%|███████████████████████████| 219/219 [00:14<00:00, 14.60it/s]
📉 Epoch 4 Loss: 0.0082
🔁 Epoch 5/8: 100%|███████████████████████████| 219/219 [00:14<00:00, 14.60it/s]
📉 Epoch 5 Loss: 0.0064
🔁 Epoch 6/8: 100%|███████████████████████████| 219/219 [00:14<00:00, 14.61it/s]
📉 Epoch 6 Loss: 0.0057
🔁 Epoch 7/8: 100%|███████████████████████████| 219/219 [00:14<00:00, 14.62it/s]
📉 Epoch 7 Loss: 0.0052
🔁 Epoch 8/8: 100%|███████████████████████████| 219/219 [00:14<00:00, 14.62it/s]
📉 Epoch 8 Loss: 0.0050 ✅ U-Net trained and saved to checkpoints/unet/unet_bbox.pt
In [ ]:
# self contained unet prediction
In [3]:
import os
import torch
import gc
import pickle
from tqdm import tqdm
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
# ♻️ GPU + RAM cleanup
def clean_gpu():
torch.cuda.empty_cache()
gc.collect()
# 🧠 Define lightweight U-Net again
class UNet(nn.Module):
def __init__(self):
super().__init__()
def CBR(in_c, out_c):
return nn.Sequential(
nn.Conv2d(in_c, out_c, 3, padding=1),
nn.BatchNorm2d(out_c),
nn.ReLU(inplace=True)
)
self.enc1 = CBR(1, 32)
self.enc2 = CBR(32, 64)
self.pool = nn.MaxPool2d(2, 2)
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.dec1 = CBR(64, 32)
self.out = nn.Conv2d(32, 1, 1)
def forward(self, x):
x1 = self.enc1(x)
x2 = self.pool(x1)
x3 = self.enc2(x2)
x4 = self.up(x3)
x5 = self.dec1(x4)
return torch.sigmoid(self.out(x5))
# 📦 Dataset class
class UNetDataset(Dataset):
def __init__(self, tensor_data, image_names, mask_dir):
self.images = tensor_data
self.image_names = image_names
self.mask_dir = mask_dir
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img = self.images[idx]
name = self.image_names[idx]
mask_path = os.path.join(self.mask_dir, name.replace(".png", ".pt"))
mask = torch.load(mask_path) if os.path.exists(mask_path) else torch.zeros((1, 224, 224))
return img, mask
# 📂 Load tensor + names
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
with open("checkpoints/image_names.pkl", "rb") as f:
all_image_names = pickle.load(f)
# 🔁 DataLoader
full_ds = UNetDataset(processed_images_tensor, all_image_names, mask_dir="checkpoints/bbox_masks")
full_dl = DataLoader(full_ds, batch_size=8, shuffle=False)
# 🎯 Load U-Net model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
unet = UNet().to(device)
unet.load_state_dict(torch.load("checkpoints/unet/unet_bbox.pt"))
unet.eval()
# 📁 Save masks to:
save_dir = "checkpoints/unet_masks"
os.makedirs(save_dir, exist_ok=True)
# 🔮 Predict + save safely
with torch.no_grad():
global_idx = 0
for xb, _ in tqdm(full_dl, desc="🔮 Streaming U-Net mask predictions"):
xb = xb.to(device)
out = unet(xb).cpu()
for j in range(out.size(0)):
img_name = all_image_names[global_idx]
torch.save(out[j], os.path.join(save_dir, img_name.replace(".png", ".pt")))
global_idx += 1
del xb, out
clean_gpu()
print("✅ All U-Net predicted masks saved to checkpoints/unet_masks/")
/tmp/ipykernel_20600/1314883253.py:57: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
/tmp/ipykernel_20600/1314883253.py:68: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
unet.load_state_dict(torch.load("checkpoints/unet/unet_bbox.pt"))
/tmp/ipykernel_20600/1314883253.py:53: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
mask = torch.load(mask_path) if os.path.exists(mask_path) else torch.zeros((1, 224, 224))
🔮 Streaming U-Net mask predictions: 100%|████| 625/625 [00:44<00:00, 14.13it/s]
✅ All U-Net predicted masks saved to checkpoints/unet_masks/
In [ ]:
#Grad cam fusion overlay
In [8]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import gc
from PIL import Image
from torchvision import models
from tqdm import tqdm
# ♻️ Clean memory
def clean_gpu():
torch.cuda.empty_cache()
gc.collect()
# 🎯 Grad-CAM Hook
class GradCAM:
def __init__(self, model, target_layer):
self.model = model.eval()
self.target_layer = target_layer
self.gradients = None
self.activations = None
self.hook()
def hook(self):
def forward_hook(module, input, output):
self.activations = output.detach()
def backward_hook(module, grad_input, grad_output):
self.gradients = grad_output[0].detach()
layer = dict([*self.model.named_modules()])[self.target_layer]
layer.register_forward_hook(forward_hook)
layer.register_backward_hook(backward_hook)
def __call__(self, x, index=None):
self.model.zero_grad()
out = self.model(x)
if index is None:
index = torch.argmax(out)
score = out[:, index]
score.backward()
weights = self.gradients.mean(dim=[2, 3], keepdim=True)
cam = (weights * self.activations).sum(dim=1, keepdim=True)
cam = torch.nn.functional.relu(cam)
cam = torch.nn.functional.interpolate(cam, size=(224, 224), mode='bilinear', align_corners=False)
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
return cam.squeeze().cpu().numpy()
# 📦 Load essentials
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs("checkpoints/overlays", exist_ok=True)
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
with open("checkpoints/image_names.pkl", "rb") as f:
image_names = pickle.load(f)
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
test_idx = splits["test_idx"]
# Load test predictions (750 rows)
test_preds = np.load("checkpoints/preds/test_preds.npy")
# 🔁 Load full ResNet18 model
from torchvision.models import resnet18, ResNet18_Weights
resnet = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
resnet.fc = nn.Linear(512, 15)
resnet.load_state_dict(torch.load("checkpoints/models/mlp_classifier.pt", map_location=device), strict=False)
resnet.to(device).eval()
# ✅ Grad-CAM
cam_generator = GradCAM(resnet, target_layer="layer4")
# 🔁 Generate overlays for test set only
for i, idx in enumerate(tqdm(test_idx, desc="🎨 Grad-CAM Fusion (Test Set Only)")):
img_name = image_names[idx]
try:
# Check for required masks
unet_path = f"checkpoints/unet_masks/{img_name.replace('.png', '.pt')}"
bbox_path = f"checkpoints/bbox_masks/{img_name.replace('.png', '.pt')}"
if not os.path.exists(unet_path) or not os.path.exists(bbox_path):
print(f"⛔ Skipping {img_name} — missing U-Net or BBox mask")
continue
# Load image and prepare input
img_tensor = processed_images_tensor[idx].unsqueeze(0).to(device)
img_rgb = img_tensor.repeat(1, 3, 1, 1)
# Grad-CAM
top_class = np.argmax(test_preds[i])
cam = cam_generator(img_rgb, index=top_class)
# Load masks
unet_mask = torch.load(unet_path).squeeze().numpy()
bbox_mask = torch.load(bbox_path).squeeze().numpy()
# Normalize masks
cam = (cam * 255).astype(np.uint8)
unet_mask = (unet_mask * 255).astype(np.uint8)
bbox_mask = (bbox_mask * 255).astype(np.uint8)
# Plot all overlays
fig, axs = plt.subplots(1, 4, figsize=(12, 3))
axs[0].imshow(img_tensor.squeeze().cpu(), cmap='gray')
axs[0].set_title("Original")
axs[1].imshow(img_tensor.squeeze().cpu(), cmap='gray')
axs[1].imshow(cam, cmap='jet', alpha=0.4)
axs[1].set_title("Grad-CAM")
axs[2].imshow(unet_mask, cmap='gray')
axs[2].set_title("U-Net")
axs[3].imshow(bbox_mask, cmap='gray')
axs[3].set_title("BBox")
for ax in axs:
ax.axis('off')
plt.tight_layout()
plt.savefig(f"checkpoints/overlays/{img_name.replace('.png', '.png')}", dpi=150)
plt.close()
del img_tensor, img_rgb, cam, unet_mask, bbox_mask
clean_gpu()
except Exception as e:
print(f"⚠️ Failed on {img_name}: {e}")
clean_gpu()
continue
print("✅ All Grad-CAM overlays (test set) saved to checkpoints/overlays/")
/tmp/ipykernel_20600/2449209814.py:53: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
/tmp/ipykernel_20600/2449209814.py:69: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
resnet.load_state_dict(torch.load("checkpoints/models/mlp_classifier.pt", map_location=device), strict=False)
/tmp/ipykernel_20600/2449209814.py:96: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
unet_mask = torch.load(unet_path).squeeze().numpy()
/tmp/ipykernel_20600/2449209814.py:97: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
bbox_mask = torch.load(bbox_path).squeeze().numpy()
⛔ Skipping 00000132_004.png — missing U-Net or BBox mask ⛔ Skipping 00000214_000.png — missing U-Net or BBox mask ⛔ Skipping 00000779_000.png — missing U-Net or BBox mask ⛔ Skipping 00001232_004.png — missing U-Net or BBox mask ⛔ Skipping 00000798_007.png — missing U-Net or BBox mask ⛔ Skipping 00000344_000.png — missing U-Net or BBox mask ⛔ Skipping 00000820_022.png — missing U-Net or BBox mask ⛔ Skipping 00000230_004.png — missing U-Net or BBox mask ⛔ Skipping 00000927_004.png — missing U-Net or BBox mask ⛔ Skipping 00001003_000.png — missing U-Net or BBox mask ⛔ Skipping 00000877_032.png — missing U-Net or BBox mask ⛔ Skipping 00000583_000.png — missing U-Net or BBox mask ⛔ Skipping 00000832_001.png — missing U-Net or BBox mask ⛔ Skipping 00000506_001.png — missing U-Net or BBox mask ⛔ Skipping 00000183_000.png — missing U-Net or BBox mask ⛔ Skipping 00001203_008.png — missing U-Net or BBox mask ⛔ Skipping 00000985_000.png — missing U-Net or BBox mask ⛔ Skipping 00000050_000.png — missing U-Net or BBox mask ⛔ Skipping 00001075_007.png — missing U-Net or BBox mask ⛔ Skipping 00000979_003.png — missing U-Net or BBox mask ⛔ Skipping 00000211_023.png — missing U-Net or BBox mask ⛔ Skipping 00000167_001.png — missing U-Net or BBox mask ⛔ Skipping 00001247_011.png — missing U-Net or BBox mask
🎨 Grad-CAM Fusion (Test Set Only): 3%|▏ | 24/750 [00:00<00:18, 38.91it/s]
⛔ Skipping 00001151_008.png — missing U-Net or BBox mask ⛔ Skipping 00000583_041.png — missing U-Net or BBox mask ⛔ Skipping 00001200_012.png — missing U-Net or BBox mask ⛔ Skipping 00000732_001.png — missing U-Net or BBox mask ⛔ Skipping 00000231_010.png — missing U-Net or BBox mask ⛔ Skipping 00000796_002.png — missing U-Net or BBox mask ⛔ Skipping 00000307_000.png — missing U-Net or BBox mask ⛔ Skipping 00000103_006.png — missing U-Net or BBox mask ⛔ Skipping 00000165_004.png — missing U-Net or BBox mask ⛔ Skipping 00000289_000.png — missing U-Net or BBox mask ⛔ Skipping 00001203_012.png — missing U-Net or BBox mask ⛔ Skipping 00000491_018.png — missing U-Net or BBox mask ⛔ Skipping 00001286_002.png — missing U-Net or BBox mask ⛔ Skipping 00000116_040.png — missing U-Net or BBox mask ⛔ Skipping 00000030_000.png — missing U-Net or BBox mask ⛔ Skipping 00001004_000.png — missing U-Net or BBox mask ⛔ Skipping 00000710_006.png — missing U-Net or BBox mask ⛔ Skipping 00000974_010.png — missing U-Net or BBox mask ⛔ Skipping 00001161_001.png — missing U-Net or BBox mask ⛔ Skipping 00001100_000.png — missing U-Net or BBox mask ⛔ Skipping 00000663_000.png — missing U-Net or BBox mask ⛔ Skipping 00000116_017.png — missing U-Net or BBox mask ⛔ Skipping 00001070_000.png — missing U-Net or BBox mask ⛔ Skipping 00001047_001.png — missing U-Net or BBox mask ⛔ Skipping 00001304_000.png — missing U-Net or BBox mask ⛔ Skipping 00000885_001.png — missing U-Net or BBox mask ⛔ Skipping 00000230_000.png — missing U-Net or BBox mask ⛔ Skipping 00001263_000.png — missing U-Net or BBox mask ⛔ Skipping 00000770_000.png — missing U-Net or BBox mask ⛔ Skipping 00000153_000.png — missing U-Net or BBox mask ⛔ Skipping 00001315_001.png — missing U-Net or BBox mask ⛔ Skipping 00001200_004.png — missing U-Net or BBox mask ⛔ Skipping 00000250_002.png — missing U-Net or BBox mask ⛔ Skipping 00000181_054.png — missing U-Net or BBox mask ⛔ Skipping 00000935_001.png — missing U-Net or BBox mask ⛔ Skipping 00000549_000.png — missing U-Net or BBox mask ⛔ Skipping 00001308_001.png — missing U-Net or BBox mask ⛔ Skipping 00001247_005.png — missing U-Net or BBox mask ⛔ Skipping 00000090_005.png — missing U-Net or BBox mask ⛔ Skipping 00001055_006.png — missing U-Net or BBox mask ⛔ Skipping 00001314_001.png — missing U-Net or BBox mask ⛔ Skipping 00000091_007.png — missing U-Net or BBox mask ⛔ Skipping 00001202_002.png — missing U-Net or BBox mask ⛔ Skipping 00000038_006.png — missing U-Net or BBox mask ⛔ Skipping 00000798_002.png — missing U-Net or BBox mask ⛔ Skipping 00000547_000.png — missing U-Net or BBox mask ⛔ Skipping 00001305_000.png — missing U-Net or BBox mask ⛔ Skipping 00000914_005.png — missing U-Net or BBox mask ⛔ Skipping 00000753_001.png — missing U-Net or BBox mask ⛔ Skipping 00001075_017.png — missing U-Net or BBox mask ⛔ Skipping 00000459_028.png — missing U-Net or BBox mask ⛔ Skipping 00000630_000.png — missing U-Net or BBox mask ⛔ Skipping 00000300_000.png — missing U-Net or BBox mask ⛔ Skipping 00000177_003.png — missing U-Net or BBox mask ⛔ Skipping 00000731_007.png — missing U-Net or BBox mask ⛔ Skipping 00000680_000.png — missing U-Net or BBox mask ⛔ Skipping 00000901_001.png — missing U-Net or BBox mask ⛔ Skipping 00000861_004.png — missing U-Net or BBox mask ⛔ Skipping 00001326_006.png — missing U-Net or BBox mask ⛔ Skipping 00000820_007.png — missing U-Net or BBox mask ⛔ Skipping 00000386_000.png — missing U-Net or BBox mask ⛔ Skipping 00000092_002.png — missing U-Net or BBox mask ⛔ Skipping 00000489_008.png — missing U-Net or BBox mask ⛔ Skipping 00000442_001.png — missing U-Net or BBox mask ⛔ Skipping 00000413_000.png — missing U-Net or BBox mask ⛔ Skipping 00000558_000.png — missing U-Net or BBox mask ⛔ Skipping 00000943_000.png — missing U-Net or BBox mask ⛔ Skipping 00001284_001.png — missing U-Net or BBox mask ⛔ Skipping 00000208_000.png — missing U-Net or BBox mask ⛔ Skipping 00000535_000.png — missing U-Net or BBox mask ⛔ Skipping 00000707_004.png — missing U-Net or BBox mask ⛔ Skipping 00000658_000.png — missing U-Net or BBox mask ⛔ Skipping 00000514_005.png — missing U-Net or BBox mask ⛔ Skipping 00000632_003.png — missing U-Net or BBox mask ⛔ Skipping 00001180_000.png — missing U-Net or BBox mask ⛔ Skipping 00000798_025.png — missing U-Net or BBox mask ⛔ Skipping 00000627_001.png — missing U-Net or BBox mask ⛔ Skipping 00000744_005.png — missing U-Net or BBox mask ⛔ Skipping 00000683_004.png — missing U-Net or BBox mask ⛔ Skipping 00000703_000.png — missing U-Net or BBox mask ⛔ Skipping 00000522_000.png — missing U-Net or BBox mask ⛔ Skipping 00000049_000.png — missing U-Net or BBox mask ⛔ Skipping 00001232_006.png — missing U-Net or BBox mask ⛔ Skipping 00000877_015.png — missing U-Net or BBox mask ⛔ Skipping 00001194_003.png — missing U-Net or BBox mask ⛔ Skipping 00000880_000.png — missing U-Net or BBox mask ⛔ Skipping 00000211_043.png — missing U-Net or BBox mask ⛔ Skipping 00000128_000.png — missing U-Net or BBox mask ⛔ Skipping 00001298_001.png — missing U-Net or BBox mask ⛔ Skipping 00000872_006.png — missing U-Net or BBox mask ⛔ Skipping 00001247_004.png — missing U-Net or BBox mask ⛔ Skipping 00001200_011.png — missing U-Net or BBox mask ⛔ Skipping 00000839_000.png — missing U-Net or BBox mask ⛔ Skipping 00001248_002.png — missing U-Net or BBox mask ⛔ Skipping 00001256_008.png — missing U-Net or BBox mask ⛔ Skipping 00000061_014.png — missing U-Net or BBox mask ⛔ Skipping 00000845_002.png — missing U-Net or BBox mask ⛔ Skipping 00000938_001.png — missing U-Net or BBox mask ⛔ Skipping 00000597_001.png — missing U-Net or BBox mask ⛔ Skipping 00000963_010.png — missing U-Net or BBox mask ⛔ Skipping 00000867_000.png — missing U-Net or BBox mask ⛔ Skipping 00000691_000.png — missing U-Net or BBox mask ⛔ Skipping 00000376_013.png — missing U-Net or BBox mask ⛔ Skipping 00000591_018.png — missing U-Net or BBox mask ⛔ Skipping 00001280_001.png — missing U-Net or BBox mask ⛔ Skipping 00001249_003.png — missing U-Net or BBox mask ⛔ Skipping 00000465_001.png — missing U-Net or BBox mask ⛔ Skipping 00000670_000.png — missing U-Net or BBox mask ⛔ Skipping 00000847_001.png — missing U-Net or BBox mask ⛔ Skipping 00000074_000.png — missing U-Net or BBox mask ⛔ Skipping 00000013_032.png — missing U-Net or BBox mask ⛔ Skipping 00000054_006.png — missing U-Net or BBox mask ⛔ Skipping 00001075_016.png — missing U-Net or BBox mask ⛔ Skipping 00000754_000.png — missing U-Net or BBox mask ⛔ Skipping 00001068_005.png — missing U-Net or BBox mask ⛔ Skipping 00001006_005.png — missing U-Net or BBox mask ⛔ Skipping 00001286_000.png — missing U-Net or BBox mask ⛔ Skipping 00000861_000.png — missing U-Net or BBox mask ⛔ Skipping 00000219_002.png — missing U-Net or BBox mask ⛔ Skipping 00000468_023.png — missing U-Net or BBox mask ⛔ Skipping 00001255_025.png — missing U-Net or BBox mask ⛔ Skipping 00000624_005.png — missing U-Net or BBox mask ⛔ Skipping 00001101_009.png — missing U-Net or BBox mask ⛔ Skipping 00001320_000.png — missing U-Net or BBox mask ⛔ Skipping 00000116_034.png — missing U-Net or BBox mask ⛔ Skipping 00000781_005.png — missing U-Net or BBox mask ⛔ Skipping 00000766_007.png — missing U-Net or BBox mask ⛔ Skipping 00000090_007.png — missing U-Net or BBox mask ⛔ Skipping 00000052_001.png — missing U-Net or BBox mask ⛔ Skipping 00000798_034.png — missing U-Net or BBox mask ⛔ Skipping 00001206_005.png — missing U-Net or BBox mask ⛔ Skipping 00000704_002.png — missing U-Net or BBox mask ⛔ Skipping 00000340_000.png — missing U-Net or BBox mask ⛔ Skipping 00000623_005.png — missing U-Net or BBox mask ⛔ Skipping 00000415_001.png — missing U-Net or BBox mask ⛔ Skipping 00000250_000.png — missing U-Net or BBox mask ⛔ Skipping 00001158_000.png — missing U-Net or BBox mask ⛔ Skipping 00000620_001.png — missing U-Net or BBox mask ⛔ Skipping 00000478_000.png — missing U-Net or BBox mask ⛔ Skipping 00001122_006.png — missing U-Net or BBox mask ⛔ Skipping 00001170_004.png — missing U-Net or BBox mask ⛔ Skipping 00000322_000.png — missing U-Net or BBox mask ⛔ Skipping 00000556_001.png — missing U-Net or BBox mask ⛔ Skipping 00001297_000.png — missing U-Net or BBox mask ⛔ Skipping 00001174_000.png — missing U-Net or BBox mask ⛔ Skipping 00001255_011.png — missing U-Net or BBox mask ⛔ Skipping 00000116_035.png — missing U-Net or BBox mask ⛔ Skipping 00001278_005.png — missing U-Net or BBox mask ⛔ Skipping 00001075_031.png — missing U-Net or BBox mask ⛔ Skipping 00000054_009.png — missing U-Net or BBox mask ⛔ Skipping 00000106_000.png — missing U-Net or BBox mask ⛔ Skipping 00001045_000.png — missing U-Net or BBox mask ⛔ Skipping 00000116_010.png — missing U-Net or BBox mask ⛔ Skipping 00000888_000.png — missing U-Net or BBox mask ⛔ Skipping 00000013_005.png — missing U-Net or BBox mask ⛔ Skipping 00000121_004.png — missing U-Net or BBox mask ⛔ Skipping 00000425_000.png — missing U-Net or BBox mask ⛔ Skipping 00001088_006.png — missing U-Net or BBox mask ⛔ Skipping 00000734_000.png — missing U-Net or BBox mask ⛔ Skipping 00000011_001.png — missing U-Net or BBox mask ⛔ Skipping 00000211_011.png — missing U-Net or BBox mask ⛔ Skipping 00000248_007.png — missing U-Net or BBox mask ⛔ Skipping 00001255_024.png — missing U-Net or BBox mask ⛔ Skipping 00001018_003.png — missing U-Net or BBox mask ⛔ Skipping 00000209_000.png — missing U-Net or BBox mask ⛔ Skipping 00001200_006.png — missing U-Net or BBox mask ⛔ Skipping 00001006_012.png — missing U-Net or BBox mask ⛔ Skipping 00000551_000.png — missing U-Net or BBox mask ⛔ Skipping 00001331_000.png — missing U-Net or BBox mask ⛔ Skipping 00000597_000.png — missing U-Net or BBox mask ⛔ Skipping 00001129_002.png — missing U-Net or BBox mask ⛔ Skipping 00000631_003.png — missing U-Net or BBox mask ⛔ Skipping 00001247_002.png — missing U-Net or BBox mask ⛔ Skipping 00000623_001.png — missing U-Net or BBox mask ⛔ Skipping 00001187_006.png — missing U-Net or BBox mask ⛔ Skipping 00000467_002.png — missing U-Net or BBox mask ⛔ Skipping 00001016_000.png — missing U-Net or BBox mask ⛔ Skipping 00000974_001.png — missing U-Net or BBox mask ⛔ Skipping 00000371_000.png — missing U-Net or BBox mask ⛔ Skipping 00000583_001.png — missing U-Net or BBox mask ⛔ Skipping 00000831_012.png — missing U-Net or BBox mask ⛔ Skipping 00001230_004.png — missing U-Net or BBox mask ⛔ Skipping 00001088_019.png — missing U-Net or BBox mask ⛔ Skipping 00000861_001.png — missing U-Net or BBox mask ⛔ Skipping 00000055_000.png — missing U-Net or BBox mask ⛔ Skipping 00001221_001.png — missing U-Net or BBox mask ⛔ Skipping 00000615_000.png — missing U-Net or BBox mask ⛔ Skipping 00000370_008.png — missing U-Net or BBox mask ⛔ Skipping 00000093_001.png — missing U-Net or BBox mask ⛔ Skipping 00001303_000.png — missing U-Net or BBox mask ⛔ Skipping 00000449_000.png — missing U-Net or BBox mask ⛔ Skipping 00000511_001.png — missing U-Net or BBox mask ⛔ Skipping 00000273_006.png — missing U-Net or BBox mask ⛔ Skipping 00000315_002.png — missing U-Net or BBox mask ⛔ Skipping 00000618_011.png — missing U-Net or BBox mask ⛔ Skipping 00000193_002.png — missing U-Net or BBox mask ⛔ Skipping 00001255_007.png — missing U-Net or BBox mask ⛔ Skipping 00000788_001.png — missing U-Net or BBox mask ⛔ Skipping 00001039_004.png — missing U-Net or BBox mask ⛔ Skipping 00001206_007.png — missing U-Net or BBox mask ⛔ Skipping 00000816_001.png — missing U-Net or BBox mask ⛔ Skipping 00000466_000.png — missing U-Net or BBox mask ⛔ Skipping 00001006_017.png — missing U-Net or BBox mask ⛔ Skipping 00000716_000.png — missing U-Net or BBox mask ⛔ Skipping 00000962_000.png — missing U-Net or BBox mask ⛔ Skipping 00001077_000.png — missing U-Net or BBox mask ⛔ Skipping 00000041_006.png — missing U-Net or BBox mask ⛔ Skipping 00001200_009.png — missing U-Net or BBox mask ⛔ Skipping 00001230_008.png — missing U-Net or BBox mask ⛔ Skipping 00001335_006.png — missing U-Net or BBox mask ⛔ Skipping 00000022_001.png — missing U-Net or BBox mask ⛔ Skipping 00000761_010.png — missing U-Net or BBox mask ⛔ Skipping 00000368_001.png — missing U-Net or BBox mask ⛔ Skipping 00000847_000.png — missing U-Net or BBox mask ⛔ Skipping 00000054_005.png — missing U-Net or BBox mask ⛔ Skipping 00001268_002.png — missing U-Net or BBox mask ⛔ Skipping 00001179_000.png — missing U-Net or BBox mask ⛔ Skipping 00000882_004.png — missing U-Net or BBox mask ⛔ Skipping 00000549_003.png — missing U-Net or BBox mask ⛔ Skipping 00000047_006.png — missing U-Net or BBox mask ⛔ Skipping 00001093_007.png — missing U-Net or BBox mask ⛔ Skipping 00001151_004.png — missing U-Net or BBox mask ⛔ Skipping 00001317_001.png — missing U-Net or BBox mask ⛔ Skipping 00000466_003.png — missing U-Net or BBox mask ⛔ Skipping 00001039_002.png — missing U-Net or BBox mask ⛔ Skipping 00000877_030.png — missing U-Net or BBox mask ⛔ Skipping 00001029_016.png — missing U-Net or BBox mask ⛔ Skipping 00000618_004.png — missing U-Net or BBox mask ⛔ Skipping 00001156_000.png — missing U-Net or BBox mask ⛔ Skipping 00000148_000.png — missing U-Net or BBox mask ⛔ Skipping 00000835_000.png — missing U-Net or BBox mask ⛔ Skipping 00000039_002.png — missing U-Net or BBox mask ⛔ Skipping 00000468_014.png — missing U-Net or BBox mask ⛔ Skipping 00001301_021.png — missing U-Net or BBox mask ⛔ Skipping 00000627_004.png — missing U-Net or BBox mask ⛔ Skipping 00001093_011.png — missing U-Net or BBox mask ⛔ Skipping 00001075_008.png — missing U-Net or BBox mask ⛔ Skipping 00001018_005.png — missing U-Net or BBox mask ⛔ Skipping 00000490_003.png — missing U-Net or BBox mask ⛔ Skipping 00000549_002.png — missing U-Net or BBox mask ⛔ Skipping 00000057_004.png — missing U-Net or BBox mask ⛔ Skipping 00000798_026.png — missing U-Net or BBox mask ⛔ Skipping 00001136_003.png — missing U-Net or BBox mask ⛔ Skipping 00001021_004.png — missing U-Net or BBox mask ⛔ Skipping 00001164_000.png — missing U-Net or BBox mask ⛔ Skipping 00001255_032.png — missing U-Net or BBox mask ⛔ Skipping 00000090_001.png — missing U-Net or BBox mask ⛔ Skipping 00000709_006.png — missing U-Net or BBox mask ⛔ Skipping 00000270_001.png — missing U-Net or BBox mask ⛔ Skipping 00001230_002.png — missing U-Net or BBox mask ⛔ Skipping 00001138_000.png — missing U-Net or BBox mask ⛔ Skipping 00000595_000.png — missing U-Net or BBox mask ⛔ Skipping 00000065_000.png — missing U-Net or BBox mask ⛔ Skipping 00000632_001.png — missing U-Net or BBox mask ⛔ Skipping 00000744_007.png — missing U-Net or BBox mask ⛔ Skipping 00000870_013.png — missing U-Net or BBox mask ⛔ Skipping 00001248_014.png — missing U-Net or BBox mask ⛔ Skipping 00000181_058.png — missing U-Net or BBox mask ⛔ Skipping 00000005_001.png — missing U-Net or BBox mask ⛔ Skipping 00000392_000.png — missing U-Net or BBox mask ⛔ Skipping 00000538_003.png — missing U-Net or BBox mask
🎨 Grad-CAM Fusion (Test Set Only): 38%|█▌ | 286/750 [00:01<00:01, 300.15it/s]
⛔ Skipping 00000131_000.png — missing U-Net or BBox mask ⛔ Skipping 00000311_000.png — missing U-Net or BBox mask ⛔ Skipping 00000195_000.png — missing U-Net or BBox mask ⛔ Skipping 00000044_001.png — missing U-Net or BBox mask ⛔ Skipping 00000627_035.png — missing U-Net or BBox mask ⛔ Skipping 00000798_024.png — missing U-Net or BBox mask ⛔ Skipping 00000859_000.png — missing U-Net or BBox mask ⛔ Skipping 00000376_001.png — missing U-Net or BBox mask ⛔ Skipping 00000583_059.png — missing U-Net or BBox mask ⛔ Skipping 00001202_000.png — missing U-Net or BBox mask ⛔ Skipping 00000627_018.png — missing U-Net or BBox mask ⛔ Skipping 00000108_001.png — missing U-Net or BBox mask ⛔ Skipping 00000468_042.png — missing U-Net or BBox mask ⛔ Skipping 00000090_002.png — missing U-Net or BBox mask ⛔ Skipping 00001250_004.png — missing U-Net or BBox mask ⛔ Skipping 00000830_003.png — missing U-Net or BBox mask ⛔ Skipping 00000467_005.png — missing U-Net or BBox mask ⛔ Skipping 00000193_005.png — missing U-Net or BBox mask ⛔ Skipping 00000980_002.png — missing U-Net or BBox mask ⛔ Skipping 00000805_000.png — missing U-Net or BBox mask ⛔ Skipping 00000038_001.png — missing U-Net or BBox mask ⛔ Skipping 00000958_000.png — missing U-Net or BBox mask ⛔ Skipping 00001298_000.png — missing U-Net or BBox mask ⛔ Skipping 00001200_016.png — missing U-Net or BBox mask ⛔ Skipping 00000757_001.png — missing U-Net or BBox mask ⛔ Skipping 00000830_004.png — missing U-Net or BBox mask ⛔ Skipping 00000372_011.png — missing U-Net or BBox mask ⛔ Skipping 00000901_004.png — missing U-Net or BBox mask ⛔ Skipping 00001107_000.png — missing U-Net or BBox mask ⛔ Skipping 00001187_000.png — missing U-Net or BBox mask ⛔ Skipping 00000194_002.png — missing U-Net or BBox mask ⛔ Skipping 00000181_059.png — missing U-Net or BBox mask ⛔ Skipping 00001006_013.png — missing U-Net or BBox mask ⛔ Skipping 00000127_006.png — missing U-Net or BBox mask ⛔ Skipping 00000494_001.png — missing U-Net or BBox mask ⛔ Skipping 00000321_003.png — missing U-Net or BBox mask ⛔ Skipping 00000196_000.png — missing U-Net or BBox mask ⛔ Skipping 00000459_042.png — missing U-Net or BBox mask ⛔ Skipping 00000322_009.png — missing U-Net or BBox mask ⛔ Skipping 00001203_009.png — missing U-Net or BBox mask ⛔ Skipping 00000627_012.png — missing U-Net or BBox mask ⛔ Skipping 00000289_006.png — missing U-Net or BBox mask ⛔ Skipping 00000032_035.png — missing U-Net or BBox mask ⛔ Skipping 00000310_006.png — missing U-Net or BBox mask ⛔ Skipping 00001224_001.png — missing U-Net or BBox mask ⛔ Skipping 00000733_003.png — missing U-Net or BBox mask ⛔ Skipping 00000611_000.png — missing U-Net or BBox mask ⛔ Skipping 00001050_000.png — missing U-Net or BBox mask ⛔ Skipping 00000127_004.png — missing U-Net or BBox mask ⛔ Skipping 00000462_000.png — missing U-Net or BBox mask ⛔ Skipping 00000538_002.png — missing U-Net or BBox mask ⛔ Skipping 00000165_003.png — missing U-Net or BBox mask ⛔ Skipping 00001278_003.png — missing U-Net or BBox mask ⛔ Skipping 00000193_018.png — missing U-Net or BBox mask ⛔ Skipping 00001156_002.png — missing U-Net or BBox mask ⛔ Skipping 00001237_000.png — missing U-Net or BBox mask ⛔ Skipping 00000177_001.png — missing U-Net or BBox mask ⛔ Skipping 00001106_000.png — missing U-Net or BBox mask ⛔ Skipping 00000857_008.png — missing U-Net or BBox mask ⛔ Skipping 00000443_000.png — missing U-Net or BBox mask ⛔ Skipping 00001301_025.png — missing U-Net or BBox mask ⛔ Skipping 00000573_003.png — missing U-Net or BBox mask ⛔ Skipping 00000127_009.png — missing U-Net or BBox mask ⛔ Skipping 00001220_001.png — missing U-Net or BBox mask ⛔ Skipping 00000099_002.png — missing U-Net or BBox mask ⛔ Skipping 00000468_021.png — missing U-Net or BBox mask ⛔ Skipping 00000011_005.png — missing U-Net or BBox mask ⛔ Skipping 00000506_008.png — missing U-Net or BBox mask ⛔ Skipping 00000138_005.png — missing U-Net or BBox mask ⛔ Skipping 00001200_008.png — missing U-Net or BBox mask ⛔ Skipping 00001113_000.png — missing U-Net or BBox mask ⛔ Skipping 00001155_000.png — missing U-Net or BBox mask ⛔ Skipping 00000459_045.png — missing U-Net or BBox mask ⛔ Skipping 00000431_003.png — missing U-Net or BBox mask ⛔ Skipping 00000138_004.png — missing U-Net or BBox mask ⛔ Skipping 00001085_000.png — missing U-Net or BBox mask ⛔ Skipping 00000211_025.png — missing U-Net or BBox mask ⛔ Skipping 00000963_027.png — missing U-Net or BBox mask ⛔ Skipping 00001093_003.png — missing U-Net or BBox mask ⛔ Skipping 00000180_000.png — missing U-Net or BBox mask ⛔ Skipping 00000583_010.png — missing U-Net or BBox mask ⛔ Skipping 00000723_000.png — missing U-Net or BBox mask ⛔ Skipping 00000569_001.png — missing U-Net or BBox mask ⛔ Skipping 00000150_003.png — missing U-Net or BBox mask ⛔ Skipping 00000490_004.png — missing U-Net or BBox mask ⛔ Skipping 00000963_017.png — missing U-Net or BBox mask ⛔ Skipping 00000583_044.png — missing U-Net or BBox mask ⛔ Skipping 00000500_009.png — missing U-Net or BBox mask ⛔ Skipping 00000607_000.png — missing U-Net or BBox mask ⛔ Skipping 00000877_028.png — missing U-Net or BBox mask ⛔ Skipping 00000744_004.png — missing U-Net or BBox mask ⛔ Skipping 00000317_001.png — missing U-Net or BBox mask ⛔ Skipping 00001143_002.png — missing U-Net or BBox mask ⛔ Skipping 00000914_001.png — missing U-Net or BBox mask ⛔ Skipping 00000251_002.png — missing U-Net or BBox mask ⛔ Skipping 00001301_047.png — missing U-Net or BBox mask ⛔ Skipping 00001312_000.png — missing U-Net or BBox mask ⛔ Skipping 00000540_001.png — missing U-Net or BBox mask ⛔ Skipping 00000766_009.png — missing U-Net or BBox mask ⛔ Skipping 00000859_002.png — missing U-Net or BBox mask ⛔ Skipping 00001245_001.png — missing U-Net or BBox mask ⛔ Skipping 00000906_000.png — missing U-Net or BBox mask ⛔ Skipping 00000766_008.png — missing U-Net or BBox mask ⛔ Skipping 00001313_000.png — missing U-Net or BBox mask ⛔ Skipping 00000997_003.png — missing U-Net or BBox mask ⛔ Skipping 00001132_001.png — missing U-Net or BBox mask ⛔ Skipping 00000368_009.png — missing U-Net or BBox mask ⛔ Skipping 00001101_011.png — missing U-Net or BBox mask ⛔ Skipping 00001067_000.png — missing U-Net or BBox mask ⛔ Skipping 00000268_000.png — missing U-Net or BBox mask ⛔ Skipping 00000403_001.png — missing U-Net or BBox mask ⛔ Skipping 00001203_015.png — missing U-Net or BBox mask ⛔ Skipping 00000013_014.png — missing U-Net or BBox mask ⛔ Skipping 00000646_004.png — missing U-Net or BBox mask ⛔ Skipping 00000655_002.png — missing U-Net or BBox mask ⛔ Skipping 00000782_001.png — missing U-Net or BBox mask ⛔ Skipping 00001203_005.png — missing U-Net or BBox mask ⛔ Skipping 00001284_000.png — missing U-Net or BBox mask ⛔ Skipping 00000545_002.png — missing U-Net or BBox mask ⛔ Skipping 00000652_003.png — missing U-Net or BBox mask ⛔ Skipping 00000116_020.png — missing U-Net or BBox mask ⛔ Skipping 00000261_002.png — missing U-Net or BBox mask ⛔ Skipping 00000402_004.png — missing U-Net or BBox mask ⛔ Skipping 00000577_003.png — missing U-Net or BBox mask ⛔ Skipping 00000118_000.png — missing U-Net or BBox mask ⛔ Skipping 00001200_017.png — missing U-Net or BBox mask ⛔ Skipping 00000013_030.png — missing U-Net or BBox mask ⛔ Skipping 00000254_000.png — missing U-Net or BBox mask ⛔ Skipping 00000890_002.png — missing U-Net or BBox mask ⛔ Skipping 00000794_001.png — missing U-Net or BBox mask ⛔ Skipping 00000827_001.png — missing U-Net or BBox mask ⛔ Skipping 00000870_012.png — missing U-Net or BBox mask ⛔ Skipping 00000820_018.png — missing U-Net or BBox mask ⛔ Skipping 00000495_000.png — missing U-Net or BBox mask ⛔ Skipping 00001256_004.png — missing U-Net or BBox mask ⛔ Skipping 00001278_014.png — missing U-Net or BBox mask ⛔ Skipping 00000032_018.png — missing U-Net or BBox mask ⛔ Skipping 00001190_003.png — missing U-Net or BBox mask ⛔ Skipping 00000154_001.png — missing U-Net or BBox mask ⛔ Skipping 00000709_001.png — missing U-Net or BBox mask ⛔ Skipping 00001227_000.png — missing U-Net or BBox mask ⛔ Skipping 00001170_033.png — missing U-Net or BBox mask ⛔ Skipping 00000246_012.png — missing U-Net or BBox mask ⛔ Skipping 00000090_004.png — missing U-Net or BBox mask ⛔ Skipping 00000193_009.png — missing U-Net or BBox mask ⛔ Skipping 00000155_000.png — missing U-Net or BBox mask ⛔ Skipping 00001170_040.png — missing U-Net or BBox mask ⛔ Skipping 00001278_007.png — missing U-Net or BBox mask ⛔ Skipping 00000385_000.png — missing U-Net or BBox mask ⛔ Skipping 00000927_001.png — missing U-Net or BBox mask ⛔ Skipping 00000590_000.png — missing U-Net or BBox mask ⛔ Skipping 00000695_000.png — missing U-Net or BBox mask ⛔ Skipping 00000419_000.png — missing U-Net or BBox mask ⛔ Skipping 00001335_000.png — missing U-Net or BBox mask ⛔ Skipping 00000887_003.png — missing U-Net or BBox mask ⛔ Skipping 00000980_004.png — missing U-Net or BBox mask ⛔ Skipping 00000047_007.png — missing U-Net or BBox mask ⛔ Skipping 00000963_025.png — missing U-Net or BBox mask ⛔ Skipping 00000500_003.png — missing U-Net or BBox mask ⛔ Skipping 00000036_000.png — missing U-Net or BBox mask ⛔ Skipping 00000830_001.png — missing U-Net or BBox mask ⛔ Skipping 00001248_033.png — missing U-Net or BBox mask ⛔ Skipping 00001255_020.png — missing U-Net or BBox mask ⛔ Skipping 00001203_007.png — missing U-Net or BBox mask ⛔ Skipping 00001238_002.png — missing U-Net or BBox mask ⛔ Skipping 00001169_001.png — missing U-Net or BBox mask ⛔ Skipping 00001170_038.png — missing U-Net or BBox mask ⛔ Skipping 00000898_001.png — missing U-Net or BBox mask ⛔ Skipping 00001006_011.png — missing U-Net or BBox mask ⛔ Skipping 00001045_001.png — missing U-Net or BBox mask ⛔ Skipping 00001070_001.png — missing U-Net or BBox mask ⛔ Skipping 00001013_003.png — missing U-Net or BBox mask ⛔ Skipping 00001082_001.png — missing U-Net or BBox mask ⛔ Skipping 00000517_005.png — missing U-Net or BBox mask ⛔ Skipping 00001250_009.png — missing U-Net or BBox mask ⛔ Skipping 00000181_049.png — missing U-Net or BBox mask ⛔ Skipping 00001043_001.png — missing U-Net or BBox mask ⛔ Skipping 00000573_008.png — missing U-Net or BBox mask ⛔ Skipping 00000648_001.png — missing U-Net or BBox mask ⛔ Skipping 00000152_000.png — missing U-Net or BBox mask ⛔ Skipping 00000583_035.png — missing U-Net or BBox mask ⛔ Skipping 00000032_038.png — missing U-Net or BBox mask ⛔ Skipping 00000242_001.png — missing U-Net or BBox mask ⛔ Skipping 00000598_000.png — missing U-Net or BBox mask ⛔ Skipping 00000438_000.png — missing U-Net or BBox mask ⛔ Skipping 00000631_002.png — missing U-Net or BBox mask ⛔ Skipping 00000456_000.png — missing U-Net or BBox mask ⛔ Skipping 00000118_007.png — missing U-Net or BBox mask ⛔ Skipping 00000032_006.png — missing U-Net or BBox mask ⛔ Skipping 00001199_007.png — missing U-Net or BBox mask ⛔ Skipping 00000288_001.png — missing U-Net or BBox mask ⛔ Skipping 00000443_003.png — missing U-Net or BBox mask ⛔ Skipping 00000964_000.png — missing U-Net or BBox mask ⛔ Skipping 00000459_051.png — missing U-Net or BBox mask ⛔ Skipping 00000491_011.png — missing U-Net or BBox mask ⛔ Skipping 00000766_012.png — missing U-Net or BBox mask ⛔ Skipping 00000372_002.png — missing U-Net or BBox mask ⛔ Skipping 00000942_002.png — missing U-Net or BBox mask ⛔ Skipping 00001014_000.png — missing U-Net or BBox mask ⛔ Skipping 00000040_000.png — missing U-Net or BBox mask
🎨 Grad-CAM Fusion (Test Set Only): 65%|██▌ | 487/750 [00:01<00:00, 349.65it/s]
⛔ Skipping 00001119_000.png — missing U-Net or BBox mask ⛔ Skipping 00000080_000.png — missing U-Net or BBox mask ⛔ Skipping 00000878_000.png — missing U-Net or BBox mask ⛔ Skipping 00000997_002.png — missing U-Net or BBox mask ⛔ Skipping 00000232_000.png — missing U-Net or BBox mask ⛔ Skipping 00000781_007.png — missing U-Net or BBox mask ⛔ Skipping 00000731_002.png — missing U-Net or BBox mask ⛔ Skipping 00000766_023.png — missing U-Net or BBox mask ⛔ Skipping 00000491_006.png — missing U-Net or BBox mask ⛔ Skipping 00000827_002.png — missing U-Net or BBox mask ⛔ Skipping 00000248_017.png — missing U-Net or BBox mask ⛔ Skipping 00000710_005.png — missing U-Net or BBox mask ⛔ Skipping 00001158_002.png — missing U-Net or BBox mask ⛔ Skipping 00001228_001.png — missing U-Net or BBox mask ⛔ Skipping 00000246_008.png — missing U-Net or BBox mask ⛔ Skipping 00001248_029.png — missing U-Net or BBox mask ⛔ Skipping 00000761_007.png — missing U-Net or BBox mask ⛔ Skipping 00000450_007.png — missing U-Net or BBox mask ⛔ Skipping 00000080_002.png — missing U-Net or BBox mask ⛔ Skipping 00000650_000.png — missing U-Net or BBox mask ⛔ Skipping 00000761_001.png — missing U-Net or BBox mask ⛔ Skipping 00000103_001.png — missing U-Net or BBox mask ⛔ Skipping 00000963_007.png — missing U-Net or BBox mask ⛔ Skipping 00001059_005.png — missing U-Net or BBox mask ⛔ Skipping 00000435_000.png — missing U-Net or BBox mask ⛔ Skipping 00000782_002.png — missing U-Net or BBox mask ⛔ Skipping 00000583_066.png — missing U-Net or BBox mask ⛔ Skipping 00000738_000.png — missing U-Net or BBox mask ⛔ Skipping 00000501_000.png — missing U-Net or BBox mask ⛔ Skipping 00001101_014.png — missing U-Net or BBox mask ⛔ Skipping 00001200_014.png — missing U-Net or BBox mask ⛔ Skipping 00001052_004.png — missing U-Net or BBox mask ⛔ Skipping 00000011_004.png — missing U-Net or BBox mask ⛔ Skipping 00000078_002.png — missing U-Net or BBox mask ⛔ Skipping 00000732_009.png — missing U-Net or BBox mask ⛔ Skipping 00000439_000.png — missing U-Net or BBox mask ⛔ Skipping 00001170_048.png — missing U-Net or BBox mask ⛔ Skipping 00000909_000.png — missing U-Net or BBox mask ⛔ Skipping 00001052_002.png — missing U-Net or BBox mask ⛔ Skipping 00001029_004.png — missing U-Net or BBox mask ⛔ Skipping 00000583_015.png — missing U-Net or BBox mask ⛔ Skipping 00000193_013.png — missing U-Net or BBox mask ⛔ Skipping 00001034_001.png — missing U-Net or BBox mask ⛔ Skipping 00000151_002.png — missing U-Net or BBox mask ⛔ Skipping 00000318_002.png — missing U-Net or BBox mask ⛔ Skipping 00000444_002.png — missing U-Net or BBox mask ⛔ Skipping 00000407_000.png — missing U-Net or BBox mask ⛔ Skipping 00001319_001.png — missing U-Net or BBox mask ⛔ Skipping 00000627_009.png — missing U-Net or BBox mask ⛔ Skipping 00000194_004.png — missing U-Net or BBox mask ⛔ Skipping 00000108_000.png — missing U-Net or BBox mask ⛔ Skipping 00000468_019.png — missing U-Net or BBox mask ⛔ Skipping 00001170_025.png — missing U-Net or BBox mask ⛔ Skipping 00000143_000.png — missing U-Net or BBox mask ⛔ Skipping 00000667_000.png — missing U-Net or BBox mask ⛔ Skipping 00001222_002.png — missing U-Net or BBox mask ⛔ Skipping 00000652_009.png — missing U-Net or BBox mask ⛔ Skipping 00001029_011.png — missing U-Net or BBox mask ⛔ Skipping 00000500_000.png — missing U-Net or BBox mask ⛔ Skipping 00000544_000.png — missing U-Net or BBox mask ⛔ Skipping 00000277_001.png — missing U-Net or BBox mask ⛔ Skipping 00001004_002.png — missing U-Net or BBox mask ⛔ Skipping 00000207_000.png — missing U-Net or BBox mask ⛔ Skipping 00001232_002.png — missing U-Net or BBox mask ⛔ Skipping 00000032_056.png — missing U-Net or BBox mask ⛔ Skipping 00000627_036.png — missing U-Net or BBox mask ⛔ Skipping 00000244_002.png — missing U-Net or BBox mask ⛔ Skipping 00001079_000.png — missing U-Net or BBox mask ⛔ Skipping 00000137_000.png — missing U-Net or BBox mask ⛔ Skipping 00000909_002.png — missing U-Net or BBox mask ⛔ Skipping 00000449_003.png — missing U-Net or BBox mask ⛔ Skipping 00000412_003.png — missing U-Net or BBox mask
🎨 Grad-CAM Fusion (Test Set Only): 75%|██▉ | 560/750 [00:02<00:00, 273.75it/s]
⛔ Skipping 00000827_015.png — missing U-Net or BBox mask ⛔ Skipping 00000632_007.png — missing U-Net or BBox mask ⛔ Skipping 00000744_003.png — missing U-Net or BBox mask ⛔ Skipping 00000646_007.png — missing U-Net or BBox mask ⛔ Skipping 00001108_003.png — missing U-Net or BBox mask ⛔ Skipping 00000506_024.png — missing U-Net or BBox mask ⛔ Skipping 00001194_006.png — missing U-Net or BBox mask ⛔ Skipping 00000005_004.png — missing U-Net or BBox mask ⛔ Skipping 00000772_000.png — missing U-Net or BBox mask ⛔ Skipping 00000321_000.png — missing U-Net or BBox mask ⛔ Skipping 00000181_027.png — missing U-Net or BBox mask ⛔ Skipping 00000035_001.png — missing U-Net or BBox mask ⛔ Skipping 00000284_004.png — missing U-Net or BBox mask ⛔ Skipping 00000980_005.png — missing U-Net or BBox mask ⛔ Skipping 00001301_002.png — missing U-Net or BBox mask ⛔ Skipping 00000773_001.png — missing U-Net or BBox mask ⛔ Skipping 00000492_001.png — missing U-Net or BBox mask ⛔ Skipping 00000170_000.png — missing U-Net or BBox mask ⛔ Skipping 00000417_001.png — missing U-Net or BBox mask ⛔ Skipping 00001141_000.png — missing U-Net or BBox mask ⛔ Skipping 00000032_052.png — missing U-Net or BBox mask ⛔ Skipping 00000798_010.png — missing U-Net or BBox mask ⛔ Skipping 00000491_002.png — missing U-Net or BBox mask ⛔ Skipping 00000963_008.png — missing U-Net or BBox mask ⛔ Skipping 00001029_001.png — missing U-Net or BBox mask ⛔ Skipping 00000444_000.png — missing U-Net or BBox mask ⛔ Skipping 00001248_018.png — missing U-Net or BBox mask ⛔ Skipping 00000856_000.png — missing U-Net or BBox mask ⛔ Skipping 00001333_001.png — missing U-Net or BBox mask ⛔ Skipping 00001074_002.png — missing U-Net or BBox mask ⛔ Skipping 00000583_003.png — missing U-Net or BBox mask ⛔ Skipping 00001082_005.png — missing U-Net or BBox mask ⛔ Skipping 00001257_000.png — missing U-Net or BBox mask ⛔ Skipping 00000621_010.png — missing U-Net or BBox mask ⛔ Skipping 00000494_003.png — missing U-Net or BBox mask ⛔ Skipping 00001222_004.png — missing U-Net or BBox mask ⛔ Skipping 00000118_003.png — missing U-Net or BBox mask ⛔ Skipping 00000244_001.png — missing U-Net or BBox mask ⛔ Skipping 00000248_026.png — missing U-Net or BBox mask ⛔ Skipping 00000479_000.png — missing U-Net or BBox mask ⛔ Skipping 00000218_006.png — missing U-Net or BBox mask ⛔ Skipping 00000752_000.png — missing U-Net or BBox mask ⛔ Skipping 00000044_002.png — missing U-Net or BBox mask ⛔ Skipping 00000489_001.png — missing U-Net or BBox mask ⛔ Skipping 00000293_000.png — missing U-Net or BBox mask ⛔ Skipping 00001122_018.png — missing U-Net or BBox mask ⛔ Skipping 00001151_005.png — missing U-Net or BBox mask ⛔ Skipping 00000013_007.png — missing U-Net or BBox mask ⛔ Skipping 00000758_003.png — missing U-Net or BBox mask ⛔ Skipping 00000902_004.png — missing U-Net or BBox mask ⛔ Skipping 00000770_001.png — missing U-Net or BBox mask ⛔ Skipping 00000652_016.png — missing U-Net or BBox mask ⛔ Skipping 00001253_000.png — missing U-Net or BBox mask ⛔ Skipping 00000588_002.png — missing U-Net or BBox mask ⛔ Skipping 00001098_000.png — missing U-Net or BBox mask ⛔ Skipping 00000372_016.png — missing U-Net or BBox mask ⛔ Skipping 00001140_001.png — missing U-Net or BBox mask ⛔ Skipping 00001225_000.png — missing U-Net or BBox mask ⛔ Skipping 00000202_001.png — missing U-Net or BBox mask ⛔ Skipping 00000775_000.png — missing U-Net or BBox mask ⛔ Skipping 00000218_002.png — missing U-Net or BBox mask ⛔ Skipping 00000937_003.png — missing U-Net or BBox mask ⛔ Skipping 00000193_004.png — missing U-Net or BBox mask ⛔ Skipping 00000545_004.png — missing U-Net or BBox mask ⛔ Skipping 00000591_011.png — missing U-Net or BBox mask ⛔ Skipping 00000711_002.png — missing U-Net or BBox mask ⛔ Skipping 00000231_009.png — missing U-Net or BBox mask ⛔ Skipping 00001276_000.png — missing U-Net or BBox mask ⛔ Skipping 00000683_002.png — missing U-Net or BBox mask ⛔ Skipping 00000798_018.png — missing U-Net or BBox mask ⛔ Skipping 00001248_035.png — missing U-Net or BBox mask ⛔ Skipping 00001255_018.png — missing U-Net or BBox mask ⛔ Skipping 00001298_004.png — missing U-Net or BBox mask ⛔ Skipping 00000339_000.png — missing U-Net or BBox mask ⛔ Skipping 00000348_002.png — missing U-Net or BBox mask ⛔ Skipping 00000261_004.png — missing U-Net or BBox mask ⛔ Skipping 00000339_001.png — missing U-Net or BBox mask ⛔ Skipping 00001059_001.png — missing U-Net or BBox mask ⛔ Skipping 00001095_000.png — missing U-Net or BBox mask ⛔ Skipping 00000623_006.png — missing U-Net or BBox mask ⛔ Skipping 00000725_000.png — missing U-Net or BBox mask ⛔ Skipping 00000766_025.png — missing U-Net or BBox mask ⛔ Skipping 00000127_005.png — missing U-Net or BBox mask ⛔ Skipping 00000634_007.png — missing U-Net or BBox mask ⛔ Skipping 00001039_007.png — missing U-Net or BBox mask ⛔ Skipping 00000662_008.png — missing U-Net or BBox mask ⛔ Skipping 00000032_060.png — missing U-Net or BBox mask ⛔ Skipping 00000061_025.png — missing U-Net or BBox mask ⛔ Skipping 00000133_001.png — missing U-Net or BBox mask ⛔ Skipping 00000798_003.png — missing U-Net or BBox mask ⛔ Skipping 00000009_000.png — missing U-Net or BBox mask ⛔ Skipping 00000391_008.png — missing U-Net or BBox mask
🎨 Grad-CAM Fusion (Test Set Only): 87%|███▍| 653/750 [00:02<00:00, 244.10it/s]
⛔ Skipping 00000032_043.png — missing U-Net or BBox mask ⛔ Skipping 00000221_000.png — missing U-Net or BBox mask ⛔ Skipping 00000824_000.png — missing U-Net or BBox mask ⛔ Skipping 00000116_012.png — missing U-Net or BBox mask ⛔ Skipping 00000877_020.png — missing U-Net or BBox mask ⛔ Skipping 00000211_013.png — missing U-Net or BBox mask ⛔ Skipping 00000390_000.png — missing U-Net or BBox mask ⛔ Skipping 00000393_000.png — missing U-Net or BBox mask ⛔ Skipping 00000181_028.png — missing U-Net or BBox mask ⛔ Skipping 00000459_024.png — missing U-Net or BBox mask ⛔ Skipping 00000033_000.png — missing U-Net or BBox mask ⛔ Skipping 00000655_000.png — missing U-Net or BBox mask ⛔ Skipping 00000181_021.png — missing U-Net or BBox mask ⛔ Skipping 00000632_000.png — missing U-Net or BBox mask ⛔ Skipping 00000092_000.png — missing U-Net or BBox mask ⛔ Skipping 00000273_007.png — missing U-Net or BBox mask ⛔ Skipping 00000743_002.png — missing U-Net or BBox mask ⛔ Skipping 00001157_004.png — missing U-Net or BBox mask ⛔ Skipping 00000652_008.png — missing U-Net or BBox mask ⛔ Skipping 00000211_031.png — missing U-Net or BBox mask ⛔ Skipping 00000013_027.png — missing U-Net or BBox mask ⛔ Skipping 00000372_012.png — missing U-Net or BBox mask ⛔ Skipping 00000062_000.png — missing U-Net or BBox mask ⛔ Skipping 00000862_000.png — missing U-Net or BBox mask ⛔ Skipping 00000591_004.png — missing U-Net or BBox mask ⛔ Skipping 00000181_023.png — missing U-Net or BBox mask ⛔ Skipping 00000277_000.png — missing U-Net or BBox mask ⛔ Skipping 00001075_039.png — missing U-Net or BBox mask ⛔ Skipping 00001210_000.png — missing U-Net or BBox mask ⛔ Skipping 00000808_011.png — missing U-Net or BBox mask ⛔ Skipping 00000394_000.png — missing U-Net or BBox mask ⛔ Skipping 00000715_001.png — missing U-Net or BBox mask ⛔ Skipping 00001272_000.png — missing U-Net or BBox mask ⛔ Skipping 00000352_000.png — missing U-Net or BBox mask ⛔ Skipping 00000075_000.png — missing U-Net or BBox mask ⛔ Skipping 00000459_000.png — missing U-Net or BBox mask ⛔ Skipping 00000046_000.png — missing U-Net or BBox mask ⛔ Skipping 00001122_016.png — missing U-Net or BBox mask ⛔ Skipping 00000720_003.png — missing U-Net or BBox mask ⛔ Skipping 00000591_001.png — missing U-Net or BBox mask ⛔ Skipping 00001208_004.png — missing U-Net or BBox mask ⛔ Skipping 00001132_000.png — missing U-Net or BBox mask ⛔ Skipping 00001120_002.png — missing U-Net or BBox mask ⛔ Skipping 00000766_026.png — missing U-Net or BBox mask ⛔ Skipping 00000583_026.png — missing U-Net or BBox mask
🎨 Grad-CAM Fusion (Test Set Only): 100%|████| 750/750 [00:03<00:00, 245.07it/s]
⛔ Skipping 00000499_002.png — missing U-Net or BBox mask ⛔ Skipping 00000672_000.png — missing U-Net or BBox mask ⛔ Skipping 00000511_000.png — missing U-Net or BBox mask ⛔ Skipping 00000893_000.png — missing U-Net or BBox mask ⛔ Skipping 00000412_001.png — missing U-Net or BBox mask ⛔ Skipping 00000032_008.png — missing U-Net or BBox mask ⛔ Skipping 00001034_006.png — missing U-Net or BBox mask ⛔ Skipping 00000261_003.png — missing U-Net or BBox mask ⛔ Skipping 00000825_003.png — missing U-Net or BBox mask ⛔ Skipping 00001203_014.png — missing U-Net or BBox mask ⛔ Skipping 00000370_004.png — missing U-Net or BBox mask ⛔ Skipping 00000877_011.png — missing U-Net or BBox mask ⛔ Skipping 00000194_000.png — missing U-Net or BBox mask ⛔ Skipping 00000467_001.png — missing U-Net or BBox mask ⛔ Skipping 00001055_007.png — missing U-Net or BBox mask ⛔ Skipping 00000801_000.png — missing U-Net or BBox mask ⛔ Skipping 00000490_001.png — missing U-Net or BBox mask ⛔ Skipping 00001267_000.png — missing U-Net or BBox mask ⛔ Skipping 00000980_000.png — missing U-Net or BBox mask ⛔ Skipping 00000459_043.png — missing U-Net or BBox mask ⛔ Skipping 00000704_001.png — missing U-Net or BBox mask ⛔ Skipping 00000727_007.png — missing U-Net or BBox mask ⛔ Skipping 00001301_018.png — missing U-Net or BBox mask ⛔ Skipping 00000662_009.png — missing U-Net or BBox mask ⛔ Skipping 00000545_005.png — missing U-Net or BBox mask ⛔ Skipping 00000040_001.png — missing U-Net or BBox mask ⛔ Skipping 00000172_000.png — missing U-Net or BBox mask ⛔ Skipping 00000252_001.png — missing U-Net or BBox mask ⛔ Skipping 00001226_001.png — missing U-Net or BBox mask ⛔ Skipping 00000360_000.png — missing U-Net or BBox mask ⛔ Skipping 00000520_000.png — missing U-Net or BBox mask ⛔ Skipping 00000143_010.png — missing U-Net or BBox mask ⛔ Skipping 00001093_001.png — missing U-Net or BBox mask ⛔ Skipping 00000995_000.png — missing U-Net or BBox mask ⛔ Skipping 00000291_000.png — missing U-Net or BBox mask ⛔ Skipping 00001250_001.png — missing U-Net or BBox mask ⛔ Skipping 00000702_003.png — missing U-Net or BBox mask ⛔ Skipping 00001255_002.png — missing U-Net or BBox mask ⛔ Skipping 00000683_000.png — missing U-Net or BBox mask ⛔ Skipping 00000732_008.png — missing U-Net or BBox mask ⛔ Skipping 00000459_027.png — missing U-Net or BBox mask ⛔ Skipping 00000894_002.png — missing U-Net or BBox mask ⛔ Skipping 00000632_011.png — missing U-Net or BBox mask ⛔ Skipping 00001007_000.png — missing U-Net or BBox mask ⛔ Skipping 00000547_002.png — missing U-Net or BBox mask ⛔ Skipping 00000146_000.png — missing U-Net or BBox mask ⛔ Skipping 00000741_007.png — missing U-Net or BBox mask ⛔ Skipping 00000034_001.png — missing U-Net or BBox mask ⛔ Skipping 00000103_007.png — missing U-Net or BBox mask ⛔ Skipping 00000963_024.png — missing U-Net or BBox mask ⛔ Skipping 00001278_001.png — missing U-Net or BBox mask ✅ All Grad-CAM overlays (test set) saved to checkpoints/overlays/
In [ ]:
# master loading block
In [2]:
import os
import torch
import pickle
import numpy as np
# ♻️ Clean GPU memory
def clean_gpu():
torch.cuda.empty_cache()
import gc
gc.collect()
# 📦 Master checkpoint loader
def load_all_checkpoints():
# Images
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
# Image names
with open("checkpoints/image_names.pkl", "rb") as f:
image_names = pickle.load(f)
# Split indices
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
train_idx, val_idx, test_idx = splits["train_idx"], splits["val_idx"], splits["test_idx"]
# Traditional features
with open("checkpoints/traditional_features/train_traditional_features.pkl", "rb") as f:
train_traditional_features = pickle.load(f)
with open("checkpoints/traditional_features/val_traditional_features.pkl", "rb") as f:
val_traditional_features = pickle.load(f)
with open("checkpoints/traditional_features/test_traditional_features.pkl", "rb") as f:
test_traditional_features = pickle.load(f)
# CNN features
train_cnn_features = torch.load("checkpoints/cnn_features/train_cnn_features.pt")
val_cnn_features = torch.load("checkpoints/cnn_features/val_cnn_features.pt")
test_cnn_features = torch.load("checkpoints/cnn_features/test_cnn_features.pt")
# Hybrid features
hybrid_train = np.load("checkpoints/hybrid_features/hybrid_train.npy")
hybrid_val = np.load("checkpoints/hybrid_features/hybrid_val.npy")
hybrid_test = np.load("checkpoints/hybrid_features/hybrid_test.npy")
# Filtered labels dataframe
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
# Predictions
test_preds = np.load("checkpoints/preds/test_preds.npy")
val_preds = np.load("checkpoints/preds/val_preds.npy") if os.path.exists("checkpoints/preds/val_preds.npy") else None
clean_gpu()
print("✅ All checkpoints loaded successfully.")
return {
"processed_images_tensor": processed_images_tensor,
"image_names": image_names,
"train_idx": train_idx,
"val_idx": val_idx,
"test_idx": test_idx,
"train_traditional_features": train_traditional_features,
"val_traditional_features": val_traditional_features,
"test_traditional_features": test_traditional_features,
"train_cnn_features": train_cnn_features,
"val_cnn_features": val_cnn_features,
"test_cnn_features": test_cnn_features,
"hybrid_train": hybrid_train,
"hybrid_val": hybrid_val,
"hybrid_test": hybrid_test,
"filtered_labels_df": filtered_labels_df,
"test_preds": test_preds,
"val_preds": val_preds
}
In [2]:
# 🔁 Load everything in one line
checkpoints = load_all_checkpoints()
/tmp/ipykernel_4720/1402946010.py:15: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
/tmp/ipykernel_4720/1402946010.py:35: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
train_cnn_features = torch.load("checkpoints/cnn_features/train_cnn_features.pt")
/tmp/ipykernel_4720/1402946010.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
val_cnn_features = torch.load("checkpoints/cnn_features/val_cnn_features.pt")
/tmp/ipykernel_4720/1402946010.py:37: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
test_cnn_features = torch.load("checkpoints/cnn_features/test_cnn_features.pt")
✅ All checkpoints loaded successfully.
In [3]:
# Full MLP Fine-Tuning Search Script
In [ ]:
# Fine-Tuned MLP Evaluation Block (Test Set)
In [2]:
import torch
import numpy as np
from sklearn.metrics import f1_score, hamming_loss, roc_auc_score
import json
import os
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
# ♻️ Clean GPU + RAM
def clean_gpu():
torch.cuda.empty_cache()
import gc
gc.collect()
# 🧠 MLP Model (must match fine-tuned MLP)
class MLPClassifier(nn.Module):
def __init__(self, input_dim, output_dim, hidden_layers=[1024, 512], dropout=0.3):
super().__init__()
layers = []
last_dim = input_dim
for hidden_dim in hidden_layers:
layers.append(nn.Linear(last_dim, hidden_dim))
layers.append(nn.ReLU())
layers.append(nn.Dropout(dropout))
last_dim = hidden_dim
layers.append(nn.Linear(last_dim, output_dim))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
# 📦 Load hybrid features and targets
checkpoints = load_all_checkpoints()
X_test = checkpoints["hybrid_test"]
y_test = checkpoints["filtered_labels_df"]
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
test_idx = splits["test_idx"]
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in y_test["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(y_test["Finding Labels"].str.split("|"))
y_test_targets = targets[test_idx]
# 📦 Torch dataset + DataLoader (small batch=8)
test_ds = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test_targets, dtype=torch.float32))
test_loader = DataLoader(test_ds, batch_size=8, shuffle=False)
# 🎯 Load fine-tuned model (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPClassifier(input_dim=X_test.shape[1], output_dim=y_test_targets.shape[1], hidden_layers=[1024, 512], dropout=0.3).to(device)
model.load_state_dict(torch.load("checkpoints/models/mlp_classifier_finetuned.pt", map_location=device))
model.eval()
# 📊 Predict safely in small batches
preds = []
true = []
with torch.no_grad():
for xb, yb in test_loader:
xb, yb = xb.to(device), yb.to(device)
out = torch.sigmoid(model(xb))
preds.append(out.cpu().numpy())
true.append(yb.cpu().numpy())
preds = np.vstack(preds)
true = np.vstack(true)
pred_labels = (preds >= 0.5).astype(int)
# 📈 Calculate metrics
micro_f1 = f1_score(true, pred_labels, average="micro", zero_division=0)
macro_f1 = f1_score(true, pred_labels, average="macro", zero_division=0)
hamming = hamming_loss(true, pred_labels)
try:
auc = roc_auc_score(true, preds, average="macro")
except:
auc = None
print("\n📊 Fine-Tuned MLP Test Metrics:")
print(f"Micro-F1 : {micro_f1:.4f}")
print(f"Macro-F1 : {macro_f1:.4f}")
print(f"Hamming Loss : {hamming:.4f}")
if auc is not None:
print(f"Macro-AUC : {auc:.4f}")
# 💾 Save predictions
os.makedirs("checkpoints/preds", exist_ok=True)
np.save("checkpoints/preds/test_preds_finetuned.npy", pred_labels)
# 💾 Save metrics
metrics = {
"micro_f1": micro_f1,
"macro_f1": macro_f1,
"hamming_loss": hamming,
"macro_auc": auc
}
with open("checkpoints/metrics_finetuned.json", "w") as f:
json.dump(metrics, f, indent=4)
print("\n✅ Fine-tuned predictions and metrics saved safely!")
clean_gpu()
/tmp/ipykernel_4902/1402946010.py:15: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
/tmp/ipykernel_4902/1402946010.py:35: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
train_cnn_features = torch.load("checkpoints/cnn_features/train_cnn_features.pt")
/tmp/ipykernel_4902/1402946010.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
val_cnn_features = torch.load("checkpoints/cnn_features/val_cnn_features.pt")
/tmp/ipykernel_4902/1402946010.py:37: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
test_cnn_features = torch.load("checkpoints/cnn_features/test_cnn_features.pt")
✅ All checkpoints loaded successfully.
/tmp/ipykernel_4902/713552589.py:56: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
model.load_state_dict(torch.load("checkpoints/models/mlp_classifier_finetuned.pt", map_location=device))
📊 Fine-Tuned MLP Test Metrics: Micro-F1 : 0.4979 Macro-F1 : 0.0482 Hamming Loss : 0.0762 Macro-AUC : 0.5000 ✅ Fine-tuned predictions and metrics saved safely!
In [ ]:
# Deeper MLP
In [2]:
import torch
import torch.nn as nn
# 🧠 Deeper MLP Model (4 hidden layers)
class DeeperMLPClassifier(nn.Module):
def __init__(self, input_dim, output_dim, dropout=0.4):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, 2048),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(2048, 1024),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(256, output_dim)
)
def forward(self, x):
return self.net(x)
In [3]:
import torch
import torch.optim as optim
import numpy as np
from sklearn.metrics import f1_score, hamming_loss
from torch.utils.data import DataLoader, TensorDataset
import gc
import os
# ♻️ Clean GPU + RAM
def clean_gpu():
torch.cuda.empty_cache()
gc.collect()
# 📦 Load hybrid features and targets
checkpoints = load_all_checkpoints()
X_train = checkpoints["hybrid_train"]
X_val = checkpoints["hybrid_val"]
y_train = checkpoints["filtered_labels_df"]
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
train_idx = splits["train_idx"]
val_idx = splits["val_idx"]
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in y_train["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(y_train["Finding Labels"].str.split("|"))
y_train_targets = targets[train_idx]
y_val_targets = targets[val_idx]
# 🧠 Create datasets
train_ds = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train_targets, dtype=torch.float32))
val_ds = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val_targets, dtype=torch.float32))
# 🎯 Load DeeperMLP model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DeeperMLPClassifier(input_dim=X_train.shape[1], output_dim=y_train_targets.shape[1], dropout=0.4).to(device)
# 🔧 Setup training
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()
train_loader = DataLoader(train_ds, batch_size=1, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=1, shuffle=False)
epochs = 10
best_micro_f1 = 0
best_model_state = None
# 🔁 Training loop
for epoch in range(epochs):
model.train()
total_loss = 0
for xb, yb in train_loader:
xb, yb = xb.to(device), yb.to(device)
optimizer.zero_grad()
out = model(xb)
loss = criterion(out, yb)
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_train_loss = total_loss / len(train_loader)
# 🎯 Evaluate on val set after each epoch
model.eval()
preds = []
true = []
with torch.no_grad():
for xb, yb in val_loader:
xb, yb = xb.to(device), yb.to(device)
out = torch.sigmoid(model(xb))
preds.append(out.cpu().numpy())
true.append(yb.cpu().numpy())
preds = np.vstack(preds)
true = np.vstack(true)
pred_labels = (preds >= 0.5).astype(int)
micro_f1 = f1_score(true, pred_labels, average="micro", zero_division=0)
macro_f1 = f1_score(true, pred_labels, average="macro", zero_division=0)
print(f"📈 Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.4f} | Val Micro-F1: {micro_f1:.4f} | Val Macro-F1: {macro_f1:.4f}")
# Save best model
if micro_f1 > best_micro_f1:
best_micro_f1 = micro_f1
best_model_state = model.state_dict()
clean_gpu()
# ✅ Save best model
os.makedirs("checkpoints/models", exist_ok=True)
torch.save(best_model_state, "checkpoints/models/deeper_mlp_classifier.pt")
print(f"\n✅ Deeper MLP fine-tuning complete. Best Micro-F1: {best_micro_f1:.4f}")
print("✅ Model saved to checkpoints/models/deeper_mlp_classifier.pt")
/tmp/ipykernel_6581/1402946010.py:15: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
/tmp/ipykernel_6581/1402946010.py:35: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
train_cnn_features = torch.load("checkpoints/cnn_features/train_cnn_features.pt")
/tmp/ipykernel_6581/1402946010.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
val_cnn_features = torch.load("checkpoints/cnn_features/val_cnn_features.pt")
/tmp/ipykernel_6581/1402946010.py:37: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
test_cnn_features = torch.load("checkpoints/cnn_features/test_cnn_features.pt")
✅ All checkpoints loaded successfully. 📈 Epoch 1/10 | Train Loss: 0.9050 | Val Micro-F1: 0.4874 | Val Macro-F1: 0.0468 📈 Epoch 2/10 | Train Loss: 1.1085 | Val Micro-F1: 0.4865 | Val Macro-F1: 0.0468 📈 Epoch 3/10 | Train Loss: 1.1190 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468 📈 Epoch 4/10 | Train Loss: 0.9270 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468 📈 Epoch 5/10 | Train Loss: 0.3341 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468 📈 Epoch 6/10 | Train Loss: 0.3321 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468 📈 Epoch 7/10 | Train Loss: 0.2419 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468 📈 Epoch 8/10 | Train Loss: 0.5452 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468 📈 Epoch 9/10 | Train Loss: 1.1056 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468 📈 Epoch 10/10 | Train Loss: 0.5484 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468 ✅ Deeper MLP fine-tuning complete. Best Micro-F1: 0.4877 ✅ Model saved to checkpoints/models/deeper_mlp_classifier.pt
In [1]:
import torch
import torch.nn as nn
# 🧠 Deeper but Safer MLP Model
class DeeperMLP_Shrunk(nn.Module):
def __init__(self, input_dim, output_dim, dropout=0.4):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, 512), # 🔥 Shrink first big layer
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, output_dim) # final output (15 labels)
)
def forward(self, x):
return self.net(x)
In [1]:
import torch
import numpy as np
from sklearn.metrics import f1_score, hamming_loss, roc_auc_score
import json
import os
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
# ♻️ Clean memory
def clean_gpu():
torch.cuda.empty_cache()
import gc
gc.collect()
# 🧠 Safer Deeper MLP model
class DeeperMLP_Shrunk(nn.Module):
def __init__(self, input_dim, output_dim, dropout=0.4):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, 512),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, output_dim)
)
def forward(self, x):
return self.net(x)
# 📦 Load checkpoints
checkpoints = load_all_checkpoints()
X_test = checkpoints["hybrid_test"]
y_test = checkpoints["filtered_labels_df"]
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
test_idx = splits["test_idx"]
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in y_test["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(y_test["Finding Labels"].str.split("|"))
y_test_targets = targets[test_idx]
# 📦 Create dataset and loader
test_ds = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test_targets, dtype=torch.float32))
test_loader = DataLoader(test_ds, batch_size=8, shuffle=False) # 🔥 Batch size 8 now
# 🎯 Load trained DeeperMLP_Shrunk model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DeeperMLP_Shrunk(input_dim=X_test.shape[1], output_dim=y_test_targets.shape[1], dropout=0.4).to(device)
# 👇 Note: Make sure you've trained and saved this shrunk model separately beforehand.
# For now, assuming model is freshly trained or load checkpoint if exists.
if os.path.exists("checkpoints/models/deeper_mlp_shrunk.pt"):
model.load_state_dict(torch.load("checkpoints/models/deeper_mlp_shrunk.pt", map_location=device))
print("✅ Loaded saved DeeperMLP_Shrunk model weights.")
else:
print("⚠️ No saved DeeperMLP_Shrunk model found — this will just evaluate fresh weights.")
model.eval()
# 📊 Predict
preds = []
true = []
with torch.no_grad():
for xb, yb in test_loader:
xb, yb = xb.to(device), yb.to(device)
out = torch.sigmoid(model(xb))
preds.append(out.cpu().numpy())
true.append(yb.cpu().numpy())
preds = np.vstack(preds)
true = np.vstack(true)
pred_labels = (preds >= 0.5).astype(int)
# 📈 Calculate metrics
micro_f1 = f1_score(true, pred_labels, average="micro", zero_division=0)
macro_f1 = f1_score(true, pred_labels, average="macro", zero_division=0)
hamming = hamming_loss(true, pred_labels)
try:
auc = roc_auc_score(true, preds, average="macro")
except:
auc = None
print("\n📊 DeeperMLP_Shrunk Test Metrics:")
print(f"Micro-F1 : {micro_f1:.4f}")
print(f"Macro-F1 : {macro_f1:.4f}")
print(f"Hamming Loss : {hamming:.4f}")
if auc is not None:
print(f"Macro-AUC : {auc:.4f}")
# 💾 Save predictions
os.makedirs("checkpoints/preds", exist_ok=True)
np.save("checkpoints/preds/test_preds_deeper_mlp_shrunk.npy", pred_labels)
# 💾 Save metrics
metrics = {
"micro_f1": micro_f1,
"macro_f1": macro_f1,
"hamming_loss": hamming,
"macro_auc": auc
}
with open("checkpoints/metrics_deeper_mlp_shrunk.json", "w") as f:
json.dump(metrics, f, indent=4)
print("\n✅ DeeperMLP_Shrunk predictions and metrics saved safely!")
clean_gpu()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[1], line 39 36 return self.net(x) 38 # 📦 Load checkpoints ---> 39 checkpoints = load_all_checkpoints() 41 X_test = checkpoints["hybrid_test"] 43 y_test = checkpoints["filtered_labels_df"] NameError: name 'load_all_checkpoints' is not defined
In [2]:
import torch
import numpy as np
from sklearn.metrics import f1_score, hamming_loss, roc_auc_score
import json
import os
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
from tqdm import tqdm # 🆕 Progress bar
# ♻️ Clean memory
def clean_gpu():
torch.cuda.empty_cache()
import gc
gc.collect()
# 🧠 DeeperMLP_Shrunk model
class DeeperMLP_Shrunk(nn.Module):
def __init__(self, input_dim, output_dim, dropout=0.4):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, 512),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, output_dim)
)
def forward(self, x):
return self.net(x)
# 📦 Load checkpoints
checkpoints = load_all_checkpoints()
X_test = checkpoints["hybrid_test"]
y_test = checkpoints["filtered_labels_df"]
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
test_idx = splits["test_idx"]
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in y_test["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(y_test["Finding Labels"].str.split("|"))
y_test_targets = targets[test_idx]
# 📦 Create dataset and loader (batch_size=1)
test_ds = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test_targets, dtype=torch.float32))
test_loader = DataLoader(test_ds, batch_size=1, shuffle=False)
# 🎯 Load model on CPU
device = torch.device("cpu") # Force CPU!
model = DeeperMLP_Shrunk(input_dim=X_test.shape[1], output_dim=y_test_targets.shape[1], dropout=0.4).to(device)
model.load_state_dict(torch.load("checkpoints/models/deeper_mlp_shrunk.pt", map_location=device))
model.eval()
# 📊 Predict safely
preds = []
true = []
with torch.no_grad():
test_progress = tqdm(test_loader, desc="🔮 Evaluating on Test Set (Batch 1, CPU)", leave=True)
for xb, yb in test_progress:
xb, yb = xb.to(device), yb.to(device)
out = torch.sigmoid(model(xb))
preds.append(out.numpy())
true.append(yb.numpy())
preds = np.vstack(preds)
true = np.vstack(true)
pred_labels = (preds >= 0.5).astype(int)
# 📈 Calculate metrics
micro_f1 = f1_score(true, pred_labels, average="micro", zero_division=0)
macro_f1 = f1_score(true, pred_labels, average="macro", zero_division=0)
hamming = hamming_loss(true, pred_labels)
try:
auc = roc_auc_score(true, preds, average="macro")
except:
auc = None
print("\n📊 DeeperMLP_Shrunk Test Metrics:")
print(f"Micro-F1 : {micro_f1:.4f}")
print(f"Macro-F1 : {macro_f1:.4f}")
print(f"Hamming Loss : {hamming:.4f}")
if auc is not None:
print(f"Macro-AUC : {auc:.4f}")
# 💾 Save predictions
os.makedirs("checkpoints/preds", exist_ok=True)
np.save("checkpoints/preds/test_preds_deeper_mlp_shrunk.npy", pred_labels)
# 💾 Save metrics
metrics = {
"micro_f1": micro_f1,
"macro_f1": macro_f1,
"hamming_loss": hamming,
"macro_auc": auc
}
with open("checkpoints/metrics_deeper_mlp_shrunk.json", "w") as f:
json.dump(metrics, f, indent=4)
print("\n✅ DeeperMLP_Shrunk predictions and metrics saved safely!")
clean_gpu()
/tmp/ipykernel_15115/1402946010.py:15: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
/tmp/ipykernel_15115/1402946010.py:35: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
train_cnn_features = torch.load("checkpoints/cnn_features/train_cnn_features.pt")
/tmp/ipykernel_15115/1402946010.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
val_cnn_features = torch.load("checkpoints/cnn_features/val_cnn_features.pt")
/tmp/ipykernel_15115/1402946010.py:37: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
test_cnn_features = torch.load("checkpoints/cnn_features/test_cnn_features.pt")
✅ All checkpoints loaded successfully.
/tmp/ipykernel_15115/720763397.py:61: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
model.load_state_dict(torch.load("checkpoints/models/deeper_mlp_shrunk.pt", map_location=device))
🔮 Evaluating on Test Set (Batch 1, CPU): 100%|█| 750/750 [00:05<00:00, 134.01it
📊 DeeperMLP_Shrunk Test Metrics: Micro-F1 : 0.4979 Macro-F1 : 0.0482 Hamming Loss : 0.0762 Macro-AUC : 0.5000 ✅ DeeperMLP_Shrunk predictions and metrics saved safely!
In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import os
import pickle
# ♻️ Clean memory
def clean_gpu():
torch.cuda.empty_cache()
import gc
gc.collect()
# 📦 Load preprocessed images and labels
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
train_idx = splits["train_idx"]
val_idx = splits["val_idx"]
# 📦 Multi-label binarization
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
y_train_targets = targets[train_idx]
y_val_targets = targets[val_idx]
# 📦 Prepare datasets
train_ds = TensorDataset(processed_images_tensor[train_idx], torch.tensor(y_train_targets, dtype=torch.float32))
val_ds = TensorDataset(processed_images_tensor[val_idx], torch.tensor(y_val_targets, dtype=torch.float32))
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=8, shuffle=False)
# 🎯 Load pre-trained ResNet18 and MODIFY first layer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
num_ftrs = resnet.fc.in_features
# 🔥 Modify first convolution to accept 1-channel input
resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
# 🔥 Replace final fully connected layer
resnet.fc = nn.Linear(num_ftrs, len(all_labels))
# Freeze all layers except layer4 and fc
for name, param in resnet.named_parameters():
if "layer4" not in name and "fc" not in name:
param.requires_grad = False
resnet = resnet.to(device)
# 🔧 Loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, resnet.parameters()), lr=1e-4)
# 📈 Training loop
epochs = 5
best_val_micro_f1 = 0
best_model_state = None
for epoch in range(epochs):
resnet.train()
running_loss = 0
train_progress = tqdm(train_loader, desc=f"🚀 Epoch {epoch+1}/{epochs} Training", leave=False)
for xb, yb in train_progress:
xb, yb = xb.to(device), yb.to(device)
optimizer.zero_grad()
out = resnet(xb)
loss = criterion(out, yb)
loss.backward()
optimizer.step()
running_loss += loss.item()
train_progress.set_postfix(loss=loss.item())
avg_train_loss = running_loss / len(train_loader)
# 🧪 Validation
resnet.eval()
preds = []
true = []
with torch.no_grad():
for xb, yb in val_loader:
xb, yb = xb.to(device), yb.to(device)
out = torch.sigmoid(resnet(xb))
preds.append(out.cpu().numpy())
true.append(yb.cpu().numpy())
preds = np.vstack(preds)
true = np.vstack(true)
pred_labels = (preds >= 0.5).astype(int)
from sklearn.metrics import f1_score
micro_f1 = f1_score(true, pred_labels, average="micro", zero_division=0)
macro_f1 = f1_score(true, pred_labels, average="macro", zero_division=0)
print(f"📈 Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.4f} | Val Micro-F1: {micro_f1:.4f} | Val Macro-F1: {macro_f1:.4f}")
if micro_f1 > best_val_micro_f1:
best_val_micro_f1 = micro_f1
best_model_state = resnet.state_dict()
clean_gpu()
# ✅ Save best fine-tuned ResNet18
os.makedirs("checkpoints/models", exist_ok=True)
torch.save(best_model_state, "checkpoints/models/fine_tuned_resnet18.pt")
print(f"\n✅ Fine-tuned ResNet18 complete. Best Val Micro-F1: {best_val_micro_f1:.4f}")
print("✅ Model saved to checkpoints/models/fine_tuned_resnet18.pt")
/tmp/ipykernel_15115/2571352222.py:17: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
📈 Epoch 1/5 | Train Loss: 0.2296 | Val Micro-F1: 0.3973 | Val Macro-F1: 0.0516
📈 Epoch 2/5 | Train Loss: 0.1665 | Val Micro-F1: 0.4111 | Val Macro-F1: 0.0736
📈 Epoch 3/5 | Train Loss: 0.1131 | Val Micro-F1: 0.4212 | Val Macro-F1: 0.0876
📈 Epoch 4/5 | Train Loss: 0.0638 | Val Micro-F1: 0.3915 | Val Macro-F1: 0.1203
📈 Epoch 5/5 | Train Loss: 0.0365 | Val Micro-F1: 0.3608 | Val Macro-F1: 0.1161 ✅ Fine-tuned ResNet18 complete. Best Val Micro-F1: 0.4212 ✅ Model saved to checkpoints/models/fine_tuned_resnet18.pt
In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import os
# ♻️ Clean memory
def clean_gpu():
torch.cuda.empty_cache()
import gc
gc.collect()
# 🧠 Load fine-tuned ResNet18
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = models.resnet18(weights=None) # Fresh model
num_ftrs = resnet.fc.in_features
# 🔥 Fix first conv to 1 channel
resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
# Replace FC layer (temporary, won't use during feature extraction)
resnet.fc = nn.Linear(num_ftrs, 15)
# Load fine-tuned weights
resnet.load_state_dict(torch.load("checkpoints/models/fine_tuned_resnet18.pt", map_location=device))
resnet = resnet.to(device)
# 🔥 Remove FC head (keep up to GAP)
feature_extractor = nn.Sequential(*list(resnet.children())[:-1]) # till last convolution output
feature_extractor = feature_extractor.to(device)
feature_extractor.eval()
# 📦 Load preprocessed images
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
train_idx = splits["train_idx"]
val_idx = splits["val_idx"]
test_idx = splits["test_idx"]
# 📦 Full dataset loader
full_ds = TensorDataset(processed_images_tensor)
full_dl = DataLoader(full_ds, batch_size=8, shuffle=False)
# 🔮 Extract CNN features
features_list = []
with torch.no_grad():
feature_progress = tqdm(full_dl, desc="🔍 Extracting CNN Features", leave=True)
for xb in feature_progress:
xb = xb[0].to(device) # Only image tensor
feats = feature_extractor(xb) # shape: (batch_size, 512, 1, 1)
feats = feats.view(feats.size(0), -1) # flatten
features_list.append(feats.cpu())
cnn_features_tensor = torch.cat(features_list, dim=0) # Shape: (N, 512)
clean_gpu()
print(f"\n✅ CNN Feature Extraction Complete. Shape: {cnn_features_tensor.shape}")
# 📦 Save CNN features for each split
os.makedirs("checkpoints/cnn_features", exist_ok=True)
torch.save(cnn_features_tensor[train_idx], "checkpoints/cnn_features/train_cnn_features_finetuned.pt")
torch.save(cnn_features_tensor[val_idx], "checkpoints/cnn_features/val_cnn_features_finetuned.pt")
torch.save(cnn_features_tensor[test_idx], "checkpoints/cnn_features/test_cnn_features_finetuned.pt")
print("✅ Fine-tuned CNN features saved for train/val/test!")
/tmp/ipykernel_15115/947636788.py:25: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
resnet.load_state_dict(torch.load("checkpoints/models/fine_tuned_resnet18.pt", map_location=device))
/tmp/ipykernel_15115/947636788.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
🔍 Extracting CNN Features: 100%|█████████████| 625/625 [00:06<00:00, 93.73it/s]
✅ CNN Feature Extraction Complete. Shape: torch.Size([4999, 512]) ✅ Fine-tuned CNN features saved for train/val/test!
In [ ]:
# SAFE Hybrid Feature Building (Batch by Batch)
In [3]:
import torch
import pickle
import os
from tqdm import tqdm
# 📦 Load traditional features
with open("checkpoints/traditional_features/train_traditional_features.pkl", "rb") as f:
train_trad_feats = pickle.load(f)
with open("checkpoints/traditional_features/val_traditional_features.pkl", "rb") as f:
val_trad_feats = pickle.load(f)
with open("checkpoints/traditional_features/test_traditional_features.pkl", "rb") as f:
test_trad_feats = pickle.load(f)
# 📦 Load fine-tuned CNN features
train_cnn_feats = torch.load("checkpoints/cnn_features/train_cnn_features_finetuned.pt")
val_cnn_feats = torch.load("checkpoints/cnn_features/val_cnn_features_finetuned.pt")
test_cnn_feats = torch.load("checkpoints/cnn_features/test_cnn_features_finetuned.pt")
# 🛡️ Prepare save folder
os.makedirs("checkpoints/hybrid_features_finetuned", exist_ok=True)
# ♻️ Clean function
def build_and_save_hybrid(trad_feats, cnn_feats, split_name):
hybrid_features = []
for i in tqdm(range(len(trad_feats)), desc=f"🔄 Building {split_name} Hybrid Features", leave=True):
trad_feat = torch.tensor(trad_feats[i], dtype=torch.float32)
cnn_feat = cnn_feats[i]
hybrid_feat = torch.cat((trad_feat, cnn_feat), dim=0)
hybrid_features.append(hybrid_feat)
# Stack after all hybrid features are built
hybrid_tensor = torch.stack(hybrid_features)
# Save to file
torch.save(hybrid_tensor, f"checkpoints/hybrid_features_finetuned/{split_name}_hybrid_finetuned.pt")
print(f"✅ Saved {split_name} hybrid features with shape: {hybrid_tensor.shape}")
# 🔄 Build safely batch-by-batch
build_and_save_hybrid(train_trad_feats, train_cnn_feats, "train")
build_and_save_hybrid(val_trad_feats, val_cnn_feats, "val")
build_and_save_hybrid(test_trad_feats, test_cnn_feats, "test")
/tmp/ipykernel_4499/4287786768.py:15: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
train_cnn_feats = torch.load("checkpoints/cnn_features/train_cnn_features_finetuned.pt")
/tmp/ipykernel_4499/4287786768.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
val_cnn_feats = torch.load("checkpoints/cnn_features/val_cnn_features_finetuned.pt")
/tmp/ipykernel_4499/4287786768.py:17: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
test_cnn_feats = torch.load("checkpoints/cnn_features/test_cnn_features_finetuned.pt")
🔄 Building train Hybrid Features: 100%|██| 3499/3499 [00:00<00:00, 4154.69it/s]
✅ Saved train hybrid features with shape: torch.Size([3499, 106948])
🔄 Building val Hybrid Features: 100%|██████| 750/750 [00:00<00:00, 7595.62it/s]
✅ Saved val hybrid features with shape: torch.Size([750, 106948])
🔄 Building test Hybrid Features: 100%|█████| 750/750 [00:00<00:00, 6321.06it/s]
✅ Saved test hybrid features with shape: torch.Size([750, 106948])
In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, hamming_loss
from tqdm import tqdm
import os
import json
# ♻️ Clean memory
def clean_gpu():
torch.cuda.empty_cache()
import gc
gc.collect()
# 🧠 MLP model (simple but powerful)
class MLPClassifier(nn.Module):
def __init__(self, input_dim, output_dim, hidden_layers=[512, 256, 128], dropout=0.4):
super().__init__()
layers = []
last_dim = input_dim
for hidden_dim in hidden_layers:
layers.append(nn.Linear(last_dim, hidden_dim))
layers.append(nn.ReLU())
layers.append(nn.Dropout(dropout))
last_dim = hidden_dim
layers.append(nn.Linear(last_dim, output_dim))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
# 📦 Load hybrid features
train_hybrid = torch.load("checkpoints/hybrid_features_finetuned/train_hybrid_finetuned.pt")
val_hybrid = torch.load("checkpoints/hybrid_features_finetuned/val_hybrid_finetuned.pt")
test_hybrid = torch.load("checkpoints/hybrid_features_finetuned/test_hybrid_finetuned.pt")
# 📦 Load labels
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
train_idx = splits["train_idx"]
val_idx = splits["val_idx"]
test_idx = splits["test_idx"]
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
y_train_targets = targets[train_idx]
y_val_targets = targets[val_idx]
y_test_targets = targets[test_idx]
# 📦 Datasets and loaders
train_ds = TensorDataset(train_hybrid, torch.tensor(y_train_targets, dtype=torch.float32))
val_ds = TensorDataset(val_hybrid, torch.tensor(y_val_targets, dtype=torch.float32))
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=8, shuffle=False)
# 🎯 Train MLP
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPClassifier(input_dim=train_hybrid.shape[1], output_dim=y_train_targets.shape[1]).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()
epochs = 10
best_micro_f1 = 0
best_model_state = None
for epoch in range(epochs):
model.train()
running_loss = 0
train_progress = tqdm(train_loader, desc=f"🚀 Epoch {epoch+1}/{epochs} Training", leave=False)
for xb, yb in train_progress:
xb, yb = xb.to(device), yb.to(device)
optimizer.zero_grad()
out = model(xb)
loss = criterion(out, yb)
loss.backward()
optimizer.step()
running_loss += loss.item()
train_progress.set_postfix(loss=loss.item())
avg_train_loss = running_loss / len(train_loader)
# 🧪 Validation
model.eval()
preds = []
true = []
with torch.no_grad():
for xb, yb in val_loader:
xb, yb = xb.to(device), yb.to(device)
out = torch.sigmoid(model(xb))
preds.append(out.cpu().numpy())
true.append(yb.cpu().numpy())
preds = np.vstack(preds)
true = np.vstack(true)
pred_labels = (preds >= 0.5).astype(int)
micro_f1 = f1_score(true, pred_labels, average="micro", zero_division=0)
macro_f1 = f1_score(true, pred_labels, average="macro", zero_division=0)
print(f"📈 Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.4f} | Val Micro-F1: {micro_f1:.4f} | Val Macro-F1: {macro_f1:.4f}")
if micro_f1 > best_micro_f1:
best_micro_f1 = micro_f1
best_model_state = model.state_dict()
clean_gpu()
# ✅ Save best model
os.makedirs("checkpoints/models", exist_ok=True)
torch.save(best_model_state, "checkpoints/models/mlp_classifier_finetuned_hybrid.pt")
print(f"\n✅ MLP retraining on hybrid features complete. Best Val Micro-F1: {best_micro_f1:.4f}")
print("✅ Model saved to checkpoints/models/mlp_classifier_finetuned_hybrid.pt")
/tmp/ipykernel_4499/3427197727.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
train_hybrid = torch.load("checkpoints/hybrid_features_finetuned/train_hybrid_finetuned.pt")
/tmp/ipykernel_4499/3427197727.py:35: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
val_hybrid = torch.load("checkpoints/hybrid_features_finetuned/val_hybrid_finetuned.pt")
/tmp/ipykernel_4499/3427197727.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
test_hybrid = torch.load("checkpoints/hybrid_features_finetuned/test_hybrid_finetuned.pt")
📈 Epoch 1/10 | Train Loss: 1.5236 | Val Micro-F1: 0.4835 | Val Macro-F1: 0.0468
📈 Epoch 2/10 | Train Loss: 0.3956 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468
📈 Epoch 3/10 | Train Loss: 0.3488 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468
📈 Epoch 4/10 | Train Loss: 0.3217 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468
📈 Epoch 5/10 | Train Loss: 0.2799 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468
📈 Epoch 6/10 | Train Loss: 0.3110 | Val Micro-F1: 0.4710 | Val Macro-F1: 0.0458
📈 Epoch 7/10 | Train Loss: 0.2433 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468
📈 Epoch 8/10 | Train Loss: 0.2405 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468
📈 Epoch 9/10 | Train Loss: 0.2252 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468
📈 Epoch 10/10 | Train Loss: 0.2323 | Val Micro-F1: 0.4877 | Val Macro-F1: 0.0468 ✅ MLP retraining on hybrid features complete. Best Val Micro-F1: 0.4877 ✅ Model saved to checkpoints/models/mlp_classifier_finetuned_hybrid.pt
In [5]:
import torch
import numpy as np
from sklearn.metrics import f1_score, hamming_loss, roc_auc_score
import json
import os
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
from tqdm import tqdm
# ♻️ Clean memory
def clean_gpu():
torch.cuda.empty_cache()
import gc
gc.collect()
# 🧠 MLP model class (must match trained model)
class MLPClassifier(nn.Module):
def __init__(self, input_dim, output_dim, hidden_layers=[512, 256, 128], dropout=0.4):
super().__init__()
layers = []
last_dim = input_dim
for hidden_dim in hidden_layers:
layers.append(nn.Linear(last_dim, hidden_dim))
layers.append(nn.ReLU())
layers.append(nn.Dropout(dropout))
last_dim = hidden_dim
layers.append(nn.Linear(last_dim, output_dim))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
# 📦 Load hybrid features
test_hybrid = torch.load("checkpoints/hybrid_features_finetuned/test_hybrid_finetuned.pt")
# 📦 Load test labels
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
test_idx = splits["test_idx"]
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
y_test_targets = targets[test_idx]
# 📦 Test dataset and loader
test_ds = TensorDataset(test_hybrid, torch.tensor(y_test_targets, dtype=torch.float32))
test_loader = DataLoader(test_ds, batch_size=8, shuffle=False)
# 🎯 Load trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPClassifier(input_dim=test_hybrid.shape[1], output_dim=y_test_targets.shape[1]).to(device)
model.load_state_dict(torch.load("checkpoints/models/mlp_classifier_finetuned_hybrid.pt", map_location=device))
model.eval()
# 📊 Predict safely
preds = []
true = []
with torch.no_grad():
test_progress = tqdm(test_loader, desc="🔮 Evaluating Final MLP on Hybrid Features", leave=True)
for xb, yb in test_progress:
xb, yb = xb.to(device), yb.to(device)
out = torch.sigmoid(model(xb))
preds.append(out.cpu().numpy())
true.append(yb.cpu().numpy())
preds = np.vstack(preds)
true = np.vstack(true)
pred_labels = (preds >= 0.5).astype(int)
# 📈 Metrics
micro_f1 = f1_score(true, pred_labels, average="micro", zero_division=0)
macro_f1 = f1_score(true, pred_labels, average="macro", zero_division=0)
hamming = hamming_loss(true, pred_labels)
try:
auc = roc_auc_score(true, preds, average="macro")
except:
auc = None
print("\n📊 Final MLP (Hybrid Features) Test Metrics:")
print(f"Micro-F1 : {micro_f1:.4f}")
print(f"Macro-F1 : {macro_f1:.4f}")
print(f"Hamming Loss : {hamming:.4f}")
if auc is not None:
print(f"Macro-AUC : {auc:.4f}")
# 💾 Save predictions
os.makedirs("checkpoints/preds", exist_ok=True)
np.save("checkpoints/preds/test_preds_mlp_hybrid_finetuned.npy", pred_labels)
# 💾 Save metrics
metrics = {
"micro_f1": micro_f1,
"macro_f1": macro_f1,
"hamming_loss": hamming,
"macro_auc": auc
}
with open("checkpoints/metrics_mlp_hybrid_finetuned.json", "w") as f:
json.dump(metrics, f, indent=4)
print("\n✅ Final MLP predictions and metrics saved safely!")
clean_gpu()
/tmp/ipykernel_4499/2713941923.py:34: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
test_hybrid = torch.load("checkpoints/hybrid_features_finetuned/test_hybrid_finetuned.pt")
/tmp/ipykernel_4499/2713941923.py:59: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
model.load_state_dict(torch.load("checkpoints/models/mlp_classifier_finetuned_hybrid.pt", map_location=device))
🔮 Evaluating Final MLP on Hybrid Features: 100%|█| 94/94 [00:00<00:00, 342.14it
📊 Final MLP (Hybrid Features) Test Metrics: Micro-F1 : 0.4979 Macro-F1 : 0.0482 Hamming Loss : 0.0762 Macro-AUC : 0.5005 ✅ Final MLP predictions and metrics saved safely!
In [6]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import random
import pickle
# 📦 Load processed images
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
# 📦 Load test indices and labels
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
test_idx = splits["test_idx"]
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
y_test_targets = targets[test_idx]
# 📦 Load predictions
pred_labels = np.load("checkpoints/preds/test_preds_mlp_hybrid_finetuned.npy")
# 📦 Randomly pick some samples
samples_to_show = 5
random_indices = random.sample(range(len(test_idx)), samples_to_show)
for idx in random_indices:
image = processed_images_tensor[test_idx[idx]].squeeze(0) # Remove channel if necessary
true_labels = np.array(all_labels)[y_test_targets[idx]==1]
predicted_labels = np.array(all_labels)[pred_labels[idx]==1]
plt.figure(figsize=(4,4))
plt.imshow(image.cpu(), cmap='gray')
plt.axis('off')
plt.title(f"True: {', '.join(true_labels)}\nPred: {', '.join(predicted_labels)}")
plt.show()
/tmp/ipykernel_4499/2404941175.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
In [7]:
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
import numpy as np
import pickle
# 📦 Load ground truth and predictions
y_true = np.load("checkpoints/preds/test_preds_mlp_hybrid_finetuned.npy")
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
test_idx = splits["test_idx"]
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
y_test_targets = targets[test_idx]
# 📈 Per-Class F1 Scores
per_class_f1 = f1_score(y_test_targets, y_true, average=None, zero_division=0)
# 📊 Plotting
plt.figure(figsize=(12,6))
plt.barh(all_labels, per_class_f1, color='skyblue')
plt.xlabel('F1 Score')
plt.title('Per-Class F1 Scores on Test Set')
plt.xlim(0, 1)
plt.grid(True)
plt.show()
In [8]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import pickle
# 📦 Load processed images
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
# 📦 Load splits and labels
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
test_idx = splits["test_idx"]
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
y_test_targets = targets[test_idx]
# 📦 Load predictions
y_pred = np.load("checkpoints/preds/test_preds_mlp_hybrid_finetuned.npy")
# 📦 Calculate Hamming distance for each sample
errors = np.sum(np.abs(y_test_targets - y_pred), axis=1) # Number of wrong labels per sample
# 📦 Find top worst errors
worst_indices = np.argsort(-errors)[:5] # Top 5 worst samples
# 📦 Visualize them
for idx in worst_indices:
img = processed_images_tensor[test_idx[idx]].squeeze(0) # Remove channel dimension
true_labels = np.array(all_labels)[y_test_targets[idx]==1]
predicted_labels = np.array(all_labels)[y_pred[idx]==1]
wrong_labels = np.array(all_labels)[y_test_targets[idx] != y_pred[idx]]
plt.figure(figsize=(4,4))
plt.imshow(img.cpu(), cmap='gray')
plt.axis('off')
plt.title(f"Errors: {errors[idx]} wrong\nTrue: {', '.join(true_labels)}\nPred: {', '.join(predicted_labels)}\nWrong: {', '.join(wrong_labels)}")
plt.show()
/tmp/ipykernel_4499/1404732579.py:7: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
processed_images_tensor = torch.load("checkpoints/processed_images_tensor.pt")
In [10]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
# 📦 Load splits and labels
with open("checkpoints/splits.pkl", "rb") as f:
splits = pickle.load(f)
test_idx = splits["test_idx"]
with open("checkpoints/filtered_labels_df.pkl", "rb") as f:
filtered_labels_df = pickle.load(f)
from sklearn.preprocessing import MultiLabelBinarizer
all_labels = sorted(set(label for labels in filtered_labels_df["Finding Labels"] for label in labels.split("|")))
mlb = MultiLabelBinarizer(classes=all_labels)
targets = mlb.fit_transform(filtered_labels_df["Finding Labels"].str.split("|"))
y_test_targets = targets[test_idx]
# 📦 Load predictions
y_pred = np.load("checkpoints/preds/test_preds_mlp_hybrid_finetuned.npy")
# 📦 Calculate False Negatives
false_negatives = np.logical_and(y_pred == 0, y_test_targets == 1)
fn_counts = false_negatives.sum(axis=0) # Count FN per class
# 📊 Plot
plt.figure(figsize=(12,6))
plt.barh(all_labels, fn_counts, color='lightcoral')
plt.xlabel('False Negative Count')
plt.title('Per-Class False Negatives on Test Set')
plt.grid(True)
plt.show()
In [11]:
import matplotlib.pyplot as plt
import numpy as np
# 📊 Metrics
models = ['Base MLP', 'Fine-Tuned MLP', 'DeeperMLP_Shrunk', 'Final Hybrid MLP']
micro_f1 = [0.4877, 0.4979, 0.4979, 0.4979]
macro_f1 = [0.0468, 0.0482, 0.0482, 0.0482]
hamming = [0.0758, 0.0762, 0.0762, 0.0762]
macro_auc = [0.4984, 0.5000, 0.5000, 0.5005]
x = np.arange(len(models))
width = 0.2
# 📈 Plot
fig, ax = plt.subplots(figsize=(12,6))
rects1 = ax.bar(x - 1.5*width, micro_f1, width, label='Micro-F1')
rects2 = ax.bar(x - 0.5*width, macro_f1, width, label='Macro-F1')
rects3 = ax.bar(x + 0.5*width, hamming, width, label='Hamming Loss')
rects4 = ax.bar(x + 1.5*width, macro_auc, width, label='Macro-AUC')
ax.set_ylabel('Score')
ax.set_title('Model Comparison (Test Metrics)')
ax.set_xticks(x)
ax.set_xticklabels(models, rotation=15)
ax.legend()
ax.grid(True)
plt.tight_layout()
plt.show()
In [ ]: